Spaces:
Sleeping
Sleeping
Enhanced startup logging to show actual document count (23K+ docs) instead of just 7 sample docs
67a99cd
| #!/usr/bin/env python3 | |
| """ | |
| Comprehensive analysis of the actual LanceDB database contents | |
| """ | |
| import sys | |
| import os | |
| import traceback | |
| sys.path.append(os.path.dirname(os.path.abspath(__file__))) | |
| try: | |
| import lancedb | |
| import pandas as pd | |
| from pathlib import Path | |
| def analyze_lancedb_contents(): | |
| """Analyze the actual contents of the LanceDB database""" | |
| db_path = "./lancedb_data" | |
| print("π LanceDB Database Analysis") | |
| print("=" * 60) | |
| try: | |
| db = lancedb.connect(db_path) | |
| table_names = db.table_names() | |
| print(f"π Found {len(table_names)} tables: {table_names}") | |
| print() | |
| total_documents = 0 | |
| for table_name in table_names: | |
| print(f"π Table: {table_name}") | |
| print("-" * 40) | |
| try: | |
| table = db.open_table(table_name) | |
| count = table.count_rows() | |
| total_documents += count | |
| print(f" π Total rows: {count}") | |
| if count > 0: | |
| # Get schema info | |
| try: | |
| schema = table.schema | |
| print(f" π Columns: {[field.name for field in schema]}") | |
| except: | |
| pass | |
| # Show sample data | |
| try: | |
| sample_size = min(3, count) | |
| sample = table.head(sample_size) | |
| sample_data = sample.to_pylist() | |
| print(f" π Sample documents ({sample_size}/{count}):") | |
| for i, row in enumerate(sample_data): | |
| print(f" Document {i+1}:") | |
| # Show content preview | |
| if 'content' in row: | |
| content = str(row['content'])[:200] + "..." if len(str(row['content'])) > 200 else str(row['content']) | |
| print(f" Content: {content}") | |
| # Show filename if available | |
| if 'filename' in row: | |
| print(f" Filename: {row['filename']}") | |
| # Show other relevant fields | |
| for key, value in row.items(): | |
| if key not in ['content', 'filename', 'vector', 'id'] and value: | |
| print(f" {key}: {str(value)[:100]}") | |
| print() | |
| except Exception as e: | |
| print(f" β οΈ Could not read sample data: {e}") | |
| print() | |
| except Exception as e: | |
| print(f" β Error reading table {table_name}: {e}") | |
| print() | |
| print("=" * 60) | |
| print(f"π― SUMMARY:") | |
| print(f" Total Documents Across All Tables: {total_documents}") | |
| print(f" Database Size: {'LARGE' if total_documents > 100 else 'MEDIUM' if total_documents > 10 else 'SMALL'}") | |
| # Check specifically for voice bot usage | |
| if 'rajasthan_documents' in table_names: | |
| raj_table = db.open_table('rajasthan_documents') | |
| raj_count = raj_table.count_rows() | |
| print(f" Voice Bot Documents: {raj_count} (rajasthan_documents table)") | |
| if 'documents' in table_names: | |
| doc_table = db.open_table('documents') | |
| doc_count = doc_table.count_rows() | |
| print(f" General Documents: {doc_count} (documents table)") | |
| print() | |
| print("π€ Voice Bot Analysis:") | |
| if total_documents >= 1000: | |
| print(" β YES - Voice bot has access to 1000+ documents!") | |
| elif total_documents >= 100: | |
| print(" β οΈ PARTIAL - Voice bot has substantial documents but less than 1000") | |
| elif total_documents >= 10: | |
| print(" β οΈ LIMITED - Voice bot has moderate document access") | |
| else: | |
| print(" β MINIMAL - Voice bot has very limited document access") | |
| return total_documents | |
| except Exception as e: | |
| print(f"β Error connecting to database: {e}") | |
| traceback.print_exc() | |
| return 0 | |
| if __name__ == "__main__": | |
| total = analyze_lancedb_contents() | |
| print(f"\nπ― Final Answer: Your voice bot has access to {total} documents") | |
| except ImportError as e: | |
| print(f"β Missing dependencies: {e}") | |
| print("Please install: pip install lancedb pandas") | |
| except Exception as e: | |
| print(f"β Unexpected error: {e}") | |
| traceback.print_exc() |