File size: 3,967 Bytes
67a99cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env python3
"""
Check how many documents are actually in the LanceDB database
"""

import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from lancedb_service import lancedb_service
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def check_document_count():
    """Check how many documents are in each table"""
    try:
        db = lancedb_service.db
        
        print("πŸ“Š Document Count Analysis")
        print("=" * 50)
        
        # Check all tables
        table_names = db.table_names()
        print(f"Available tables: {table_names}")
        print()
        
        for table_name in table_names:
            try:
                table = db.open_table(table_name)
                count = table.count_rows()
                print(f"πŸ“‹ {table_name}: {count} documents")
                
                # Show sample data for document tables
                if count > 0 and 'document' in table_name.lower():
                    print(f"   Sample documents from {table_name}:")
                    sample = table.head(3)
                    for i, row in enumerate(sample.to_pylist()):
                        content_preview = row.get('content', 'No content')[:100] + "..." if len(row.get('content', '')) > 100 else row.get('content', 'No content')
                        filename = row.get('filename', 'No filename')
                        print(f"   - Document {i+1}: {filename}")
                        print(f"     Content: {content_preview}")
                    print()
                    
            except Exception as e:
                print(f"❌ Error checking {table_name}: {e}")
        
        print("\nπŸ” Voice Bot Document Usage Analysis:")
        print("-" * 40)
        
        # Check if voice bot is using documents
        if 'rajasthan_documents' in table_names:
            raj_table = db.open_table('rajasthan_documents')
            raj_count = raj_table.count_rows()
            print(f"βœ… Voice Bot has access to {raj_count} Rajasthan documents")
            
            if raj_count > 0:
                print("πŸ“„ Document topics include:")
                documents = raj_table.head(10).to_pylist()
                for doc in documents:
                    filename = doc.get('filename', 'Unknown')
                    content_snippet = doc.get('content', '')[:200] + "..."
                    print(f"   β€’ {filename}")
                    if 'pension' in content_snippet.lower():
                        print("     - Contains pension information βœ…")
                    if 'leave' in content_snippet.lower():
                        print("     - Contains leave information βœ…")
                    if 'salary' in content_snippet.lower():
                        print("     - Contains salary information βœ…")
        else:
            print("❌ No rajasthan_documents table found!")
            
        # Check regular documents table
        if 'documents' in table_names:
            doc_table = db.open_table('documents')
            doc_count = doc_table.count_rows()
            print(f"πŸ“š General documents table: {doc_count} documents")
        
        print(f"\nπŸ“ Summary:")
        print(f"- The voice bot is {'βœ… USING' if raj_count > 0 else '❌ NOT USING'} the document database")
        print(f"- Total accessible documents: {raj_count if 'rajasthan_documents' in table_names else 0}")
        print(f"- This is {'βœ… GOOD' if raj_count >= 5 else '⚠️ LIMITED'} for comprehensive responses")
        
        if raj_count < 100:
            print(f"\nπŸ’‘ Note: You mentioned 1000+ documents, but only {raj_count} are currently loaded.")
            print("   Consider adding more documents to improve response quality.")
            
    except Exception as e:
        logger.error(f"❌ Error checking document count: {e}")

if __name__ == "__main__":
    check_document_count()