Spaces:

Anne31415
/

Public_BookBot

Sleeping

Anne31415 commited on Feb 6, 2024

Commit

8805e7e

verified ·

1 Parent(s): 16f7be1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -74,7 +74,7 @@ import chromadb
-# Function to extract text from a PDF file
 def extract_text_from_pdf(pdf_path):
     text = ""
     reader = PdfReader(pdf_path)
@@ -82,10 +82,11 @@ def extract_text_from_pdf(pdf_path):
         text += page.extract_text() + " "  # Concatenate text from each page
     return text
-# Example usage
 pdf_text = extract_text_from_pdf(pdf_path3)
 @st.cache_resource
 def load_vector_store(file_path, store_name, force_reload=False):
     local_repo_path = "Private_Book"
@@ -522,10 +523,18 @@ def page3():
         if not os.path.exists(pdf_path3):
             st.error("File not found. Please check the file path.")
             return
-        # Initialize CromA client and collection
         chroma_client = chromadb.Client()
-        collection = chroma_client.create_collection(name="Kosten_Strukturdaten0602204")
         # Add the extracted text from PDF to the Chroma collection
         collection.add(

+@st.cache
 def extract_text_from_pdf(pdf_path):
     text = ""
     reader = PdfReader(pdf_path)
         text += page.extract_text() + " "  # Concatenate text from each page
     return text
+# Use the function to get pdf_text
 pdf_text = extract_text_from_pdf(pdf_path3)
 @st.cache_resource
 def load_vector_store(file_path, store_name, force_reload=False):
     local_repo_path = "Private_Book"
         if not os.path.exists(pdf_path3):
             st.error("File not found. Please check the file path.")
             return
+        # Initialize CromA client
         chroma_client = chromadb.Client()
+        # Check if the collection already exists
+        if "Kosten_Strukturdaten0602204" not in chroma_client.list_collections():
+            # Create the collection if it does not exist
+            collection = chroma_client.create_collection(name="Kosten_Strukturdaten0602204")
+        else:
+            # Use the existing collection if it already exists
+            collection = chroma_client.get_collection(name="Kosten_Strukturdaten0602204")
         # Add the extracted text from PDF to the Chroma collection
         collection.add(