""" Enhanced document loader for 1000+ government documents Add this to your setup_documents.py or create as separate service """ import os import json from pathlib import Path def load_bulk_documents(): """Load documents from external sources""" # Example: Load from a documents directory documents = [] # Option 1: Load from JSON files docs_dir = Path("government_docs") # Create this directory if docs_dir.exists(): for json_file in docs_dir.glob("*.json"): with open(json_file, 'r', encoding='utf-8') as f: batch_docs = json.load(f) documents.extend(batch_docs) # Option 2: Load from text files text_docs_dir = Path("text_documents") if text_docs_dir.exists(): for txt_file in text_docs_dir.glob("*.txt"): with open(txt_file, 'r', encoding='utf-8') as f: content = f.read() documents.append({ "content": content, "filename": txt_file.name, "source": "Government Policy Manual" }) # Option 3: Load from PDF directory (requires PyPDF2) # pdf_docs_dir = Path("pdf_documents") # if pdf_docs_dir.exists(): # import PyPDF2 # for pdf_file in pdf_docs_dir.glob("*.pdf"): # # Extract text from PDF and add to documents return documents # Add this to your setup_sample_documents() function: # bulk_docs = load_bulk_documents() # SAMPLE_DOCUMENTS.extend(bulk_docs)