Spaces:
Sleeping
Sleeping
Commit
Β·
ecd279c
1
Parent(s):
4a1bc78
Add 899999999999999999999999999
Browse files- rag_service.py +23 -9
rag_service.py
CHANGED
|
@@ -431,6 +431,8 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
|
|
| 431 |
enhanced_query = f"{query} pension rules retirement benefits modifications"
|
| 432 |
elif "calculation" in query_lower or "formula" in query_lower:
|
| 433 |
enhanced_query = f"{query} pension calculation retirement benefits formula"
|
|
|
|
|
|
|
| 434 |
|
| 435 |
# Leave queries
|
| 436 |
elif any(word in query_lower for word in ["leave", "casual", "earned"]):
|
|
@@ -539,7 +541,7 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
|
|
| 539 |
'financial', 'fund', 'payment', 'authorization', 'delegation', 'registers'],
|
| 540 |
'leadership_policymaker': ['policy', 'impact', 'scenario', 'analysis', 'evidence', 'comparative',
|
| 541 |
'evaluation', 'decision', 'strategic', 'implementation', 'assessment'],
|
| 542 |
-
'pension': ['pension', 'retirement', 'gratuity', 'provident fund', 'superannuation', 'benefits'],
|
| 543 |
'leave': ['leave', 'casual', 'earned', 'medical leave', 'maternity'],
|
| 544 |
'allowance': ['allowance', 'dearness', 'house rent', 'travel', 'da', 'hra', 'increment'],
|
| 545 |
'procurement': ['procurement', 'tender', 'bidding', 'contract', 'vendor', 'gem', 'purchase'],
|
|
@@ -566,21 +568,33 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
|
|
| 566 |
content_matches = [kw for kw in matching_content_keywords if kw in content]
|
| 567 |
if content_matches:
|
| 568 |
relevance_score += 1.5 # Strong boost for matching content
|
| 569 |
-
logger.
|
|
|
|
|
|
|
| 570 |
|
| 571 |
-
#
|
| 572 |
-
for
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
|
|
|
|
|
|
| 576 |
|
| 577 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 578 |
|
| 579 |
-
if relevance_score >
|
|
|
|
| 580 |
# Add relevance score to document (create dict if needed)
|
| 581 |
if hasattr(doc, 'metadata'):
|
| 582 |
doc.metadata['relevance_score'] = relevance_score
|
| 583 |
filtered_docs.append(doc)
|
|
|
|
|
|
|
| 584 |
|
| 585 |
# Sort by relevance score and limit results
|
| 586 |
filtered_docs = sorted(filtered_docs, key=lambda x: getattr(x, 'metadata', {}).get('relevance_score', 0), reverse=True)[:limit]
|
|
|
|
| 431 |
enhanced_query = f"{query} pension rules retirement benefits modifications"
|
| 432 |
elif "calculation" in query_lower or "formula" in query_lower:
|
| 433 |
enhanced_query = f"{query} pension calculation retirement benefits formula"
|
| 434 |
+
elif any(word in query_lower for word in ["old age", "elderly", "senior", "old"]):
|
| 435 |
+
enhanced_query = f"{query} pension retirement benefits elderly old age senior citizen"
|
| 436 |
|
| 437 |
# Leave queries
|
| 438 |
elif any(word in query_lower for word in ["leave", "casual", "earned"]):
|
|
|
|
| 541 |
'financial', 'fund', 'payment', 'authorization', 'delegation', 'registers'],
|
| 542 |
'leadership_policymaker': ['policy', 'impact', 'scenario', 'analysis', 'evidence', 'comparative',
|
| 543 |
'evaluation', 'decision', 'strategic', 'implementation', 'assessment'],
|
| 544 |
+
'pension': ['pension', 'retirement', 'gratuity', 'provident fund', 'superannuation', 'benefits', 'old age', 'elderly', 'senior'],
|
| 545 |
'leave': ['leave', 'casual', 'earned', 'medical leave', 'maternity'],
|
| 546 |
'allowance': ['allowance', 'dearness', 'house rent', 'travel', 'da', 'hra', 'increment'],
|
| 547 |
'procurement': ['procurement', 'tender', 'bidding', 'contract', 'vendor', 'gem', 'purchase'],
|
|
|
|
| 568 |
content_matches = [kw for kw in matching_content_keywords if kw in content]
|
| 569 |
if content_matches:
|
| 570 |
relevance_score += 1.5 # Strong boost for matching content
|
| 571 |
+
logger.info(f"β
Content match found: {content_matches} for category: {detected_query_category}, New score: {relevance_score:.2f}")
|
| 572 |
+
else:
|
| 573 |
+
logger.info(f"β No content match for category: {detected_query_category}, checking keywords: {matching_content_keywords}")
|
| 574 |
|
| 575 |
+
# Apply lighter penalties for documents from different categories
|
| 576 |
+
# Only penalize if no matching content was found for the main category
|
| 577 |
+
if not content_matches:
|
| 578 |
+
for other_category, other_keywords in content_categories.items():
|
| 579 |
+
if other_category != detected_query_category:
|
| 580 |
+
if any(keyword in content for keyword in other_keywords):
|
| 581 |
+
relevance_score -= 0.3 # Light penalty only when no main category match
|
| 582 |
|
| 583 |
+
logger.info(f"π Query category: {detected_query_category}, Final relevance: {relevance_score:.2f} for content: {content[:50]}...")
|
| 584 |
+
|
| 585 |
+
# Use different thresholds based on query type
|
| 586 |
+
threshold = 0.1 # Default threshold
|
| 587 |
+
if any(keyword in query_lower for keyword in ['pension', 'retirement', 'old age']):
|
| 588 |
+
threshold = -0.2 # Very inclusive for pension queries
|
| 589 |
|
| 590 |
+
if relevance_score > threshold: # Adaptive threshold for relevant documents
|
| 591 |
+
logger.info(f"β
Document PASSED filter: score {relevance_score:.2f} > threshold {threshold}")
|
| 592 |
# Add relevance score to document (create dict if needed)
|
| 593 |
if hasattr(doc, 'metadata'):
|
| 594 |
doc.metadata['relevance_score'] = relevance_score
|
| 595 |
filtered_docs.append(doc)
|
| 596 |
+
else:
|
| 597 |
+
logger.info(f"β Document FAILED filter: score {relevance_score:.2f} <= threshold {threshold}")
|
| 598 |
|
| 599 |
# Sort by relevance score and limit results
|
| 600 |
filtered_docs = sorted(filtered_docs, key=lambda x: getattr(x, 'metadata', {}).get('relevance_score', 0), reverse=True)[:limit]
|