Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -74,7 +74,7 @@ import chromadb
|
|
| 74 |
|
| 75 |
|
| 76 |
|
| 77 |
-
|
| 78 |
def extract_text_from_pdf(pdf_path):
|
| 79 |
text = ""
|
| 80 |
reader = PdfReader(pdf_path)
|
|
@@ -82,10 +82,11 @@ def extract_text_from_pdf(pdf_path):
|
|
| 82 |
text += page.extract_text() + " " # Concatenate text from each page
|
| 83 |
return text
|
| 84 |
|
| 85 |
-
#
|
| 86 |
pdf_text = extract_text_from_pdf(pdf_path3)
|
| 87 |
|
| 88 |
|
|
|
|
| 89 |
@st.cache_resource
|
| 90 |
def load_vector_store(file_path, store_name, force_reload=False):
|
| 91 |
local_repo_path = "Private_Book"
|
|
@@ -522,10 +523,18 @@ def page3():
|
|
| 522 |
if not os.path.exists(pdf_path3):
|
| 523 |
st.error("File not found. Please check the file path.")
|
| 524 |
return
|
| 525 |
-
|
| 526 |
-
# Initialize CromA client
|
| 527 |
chroma_client = chromadb.Client()
|
| 528 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
|
| 530 |
# Add the extracted text from PDF to the Chroma collection
|
| 531 |
collection.add(
|
|
|
|
| 74 |
|
| 75 |
|
| 76 |
|
| 77 |
+
@st.cache
|
| 78 |
def extract_text_from_pdf(pdf_path):
|
| 79 |
text = ""
|
| 80 |
reader = PdfReader(pdf_path)
|
|
|
|
| 82 |
text += page.extract_text() + " " # Concatenate text from each page
|
| 83 |
return text
|
| 84 |
|
| 85 |
+
# Use the function to get pdf_text
|
| 86 |
pdf_text = extract_text_from_pdf(pdf_path3)
|
| 87 |
|
| 88 |
|
| 89 |
+
|
| 90 |
@st.cache_resource
|
| 91 |
def load_vector_store(file_path, store_name, force_reload=False):
|
| 92 |
local_repo_path = "Private_Book"
|
|
|
|
| 523 |
if not os.path.exists(pdf_path3):
|
| 524 |
st.error("File not found. Please check the file path.")
|
| 525 |
return
|
| 526 |
+
|
| 527 |
+
# Initialize CromA client
|
| 528 |
chroma_client = chromadb.Client()
|
| 529 |
+
|
| 530 |
+
# Check if the collection already exists
|
| 531 |
+
if "Kosten_Strukturdaten0602204" not in chroma_client.list_collections():
|
| 532 |
+
# Create the collection if it does not exist
|
| 533 |
+
collection = chroma_client.create_collection(name="Kosten_Strukturdaten0602204")
|
| 534 |
+
else:
|
| 535 |
+
# Use the existing collection if it already exists
|
| 536 |
+
collection = chroma_client.get_collection(name="Kosten_Strukturdaten0602204")
|
| 537 |
+
|
| 538 |
|
| 539 |
# Add the extracted text from PDF to the Chroma collection
|
| 540 |
collection.add(
|