ChAbhishek28 commited on
Commit
ecd279c
Β·
1 Parent(s): 4a1bc78

Add 899999999999999999999999999

Browse files
Files changed (1) hide show
  1. rag_service.py +23 -9
rag_service.py CHANGED
@@ -431,6 +431,8 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
431
  enhanced_query = f"{query} pension rules retirement benefits modifications"
432
  elif "calculation" in query_lower or "formula" in query_lower:
433
  enhanced_query = f"{query} pension calculation retirement benefits formula"
 
 
434
 
435
  # Leave queries
436
  elif any(word in query_lower for word in ["leave", "casual", "earned"]):
@@ -539,7 +541,7 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
539
  'financial', 'fund', 'payment', 'authorization', 'delegation', 'registers'],
540
  'leadership_policymaker': ['policy', 'impact', 'scenario', 'analysis', 'evidence', 'comparative',
541
  'evaluation', 'decision', 'strategic', 'implementation', 'assessment'],
542
- 'pension': ['pension', 'retirement', 'gratuity', 'provident fund', 'superannuation', 'benefits'],
543
  'leave': ['leave', 'casual', 'earned', 'medical leave', 'maternity'],
544
  'allowance': ['allowance', 'dearness', 'house rent', 'travel', 'da', 'hra', 'increment'],
545
  'procurement': ['procurement', 'tender', 'bidding', 'contract', 'vendor', 'gem', 'purchase'],
@@ -566,21 +568,33 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
566
  content_matches = [kw for kw in matching_content_keywords if kw in content]
567
  if content_matches:
568
  relevance_score += 1.5 # Strong boost for matching content
569
- logger.debug(f"βœ… Content match found: {content_matches} for category: {detected_query_category}")
 
 
570
 
571
- # Penalize documents from different categories
572
- for other_category, other_keywords in content_categories.items():
573
- if other_category != detected_query_category:
574
- if any(keyword in content for keyword in other_keywords):
575
- relevance_score -= 0.8 # Moderate penalty for non-matching content
 
 
576
 
577
- logger.debug(f"Query category: {detected_query_category}, Relevance: {relevance_score:.2f} for content: {content[:50]}...")
 
 
 
 
 
578
 
579
- if relevance_score > 0.1: # More inclusive threshold for relevant documents
 
580
  # Add relevance score to document (create dict if needed)
581
  if hasattr(doc, 'metadata'):
582
  doc.metadata['relevance_score'] = relevance_score
583
  filtered_docs.append(doc)
 
 
584
 
585
  # Sort by relevance score and limit results
586
  filtered_docs = sorted(filtered_docs, key=lambda x: getattr(x, 'metadata', {}).get('relevance_score', 0), reverse=True)[:limit]
 
431
  enhanced_query = f"{query} pension rules retirement benefits modifications"
432
  elif "calculation" in query_lower or "formula" in query_lower:
433
  enhanced_query = f"{query} pension calculation retirement benefits formula"
434
+ elif any(word in query_lower for word in ["old age", "elderly", "senior", "old"]):
435
+ enhanced_query = f"{query} pension retirement benefits elderly old age senior citizen"
436
 
437
  # Leave queries
438
  elif any(word in query_lower for word in ["leave", "casual", "earned"]):
 
541
  'financial', 'fund', 'payment', 'authorization', 'delegation', 'registers'],
542
  'leadership_policymaker': ['policy', 'impact', 'scenario', 'analysis', 'evidence', 'comparative',
543
  'evaluation', 'decision', 'strategic', 'implementation', 'assessment'],
544
+ 'pension': ['pension', 'retirement', 'gratuity', 'provident fund', 'superannuation', 'benefits', 'old age', 'elderly', 'senior'],
545
  'leave': ['leave', 'casual', 'earned', 'medical leave', 'maternity'],
546
  'allowance': ['allowance', 'dearness', 'house rent', 'travel', 'da', 'hra', 'increment'],
547
  'procurement': ['procurement', 'tender', 'bidding', 'contract', 'vendor', 'gem', 'purchase'],
 
568
  content_matches = [kw for kw in matching_content_keywords if kw in content]
569
  if content_matches:
570
  relevance_score += 1.5 # Strong boost for matching content
571
+ logger.info(f"βœ… Content match found: {content_matches} for category: {detected_query_category}, New score: {relevance_score:.2f}")
572
+ else:
573
+ logger.info(f"❌ No content match for category: {detected_query_category}, checking keywords: {matching_content_keywords}")
574
 
575
+ # Apply lighter penalties for documents from different categories
576
+ # Only penalize if no matching content was found for the main category
577
+ if not content_matches:
578
+ for other_category, other_keywords in content_categories.items():
579
+ if other_category != detected_query_category:
580
+ if any(keyword in content for keyword in other_keywords):
581
+ relevance_score -= 0.3 # Light penalty only when no main category match
582
 
583
+ logger.info(f"πŸ“Š Query category: {detected_query_category}, Final relevance: {relevance_score:.2f} for content: {content[:50]}...")
584
+
585
+ # Use different thresholds based on query type
586
+ threshold = 0.1 # Default threshold
587
+ if any(keyword in query_lower for keyword in ['pension', 'retirement', 'old age']):
588
+ threshold = -0.2 # Very inclusive for pension queries
589
 
590
+ if relevance_score > threshold: # Adaptive threshold for relevant documents
591
+ logger.info(f"βœ… Document PASSED filter: score {relevance_score:.2f} > threshold {threshold}")
592
  # Add relevance score to document (create dict if needed)
593
  if hasattr(doc, 'metadata'):
594
  doc.metadata['relevance_score'] = relevance_score
595
  filtered_docs.append(doc)
596
+ else:
597
+ logger.info(f"❌ Document FAILED filter: score {relevance_score:.2f} <= threshold {threshold}")
598
 
599
  # Sort by relevance score and limit results
600
  filtered_docs = sorted(filtered_docs, key=lambda x: getattr(x, 'metadata', {}).get('relevance_score', 0), reverse=True)[:limit]