Spaces:
Sleeping
Sleeping
Commit ·
5d371c7
1
Parent(s): 3df14a5
Add 899999999
Browse files- rag_service.py +13 -8
rag_service.py
CHANGED
|
@@ -203,7 +203,10 @@ async def search_docs(query: str, config: RunnableConfig) -> str:
|
|
| 203 |
|
| 204 |
try:
|
| 205 |
# Search in the specified knowledge base
|
|
|
|
|
|
|
| 206 |
docs = await lancedb_service.similarity_search(query, userid, knowledge_base)
|
|
|
|
| 207 |
if docs:
|
| 208 |
# Advanced extractive summarization using NLTK
|
| 209 |
try:
|
|
@@ -212,25 +215,23 @@ async def search_docs(query: str, config: RunnableConfig) -> str:
|
|
| 212 |
from nltk.tokenize import sent_tokenize
|
| 213 |
except ImportError:
|
| 214 |
sent_tokenize = lambda x: x.split('.')
|
| 215 |
-
|
| 216 |
# Embedding-based chunk selection
|
| 217 |
try:
|
| 218 |
from sentence_transformers import SentenceTransformer
|
| 219 |
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
| 220 |
except ImportError:
|
| 221 |
embedder = None
|
| 222 |
-
|
| 223 |
def select_best_chunk(chunks, query):
|
| 224 |
if not embedder or not chunks:
|
| 225 |
return chunks[0] if chunks else ""
|
| 226 |
chunk_embeddings = embedder.encode(chunks)
|
| 227 |
query_embedding = embedder.encode([query])[0]
|
| 228 |
-
# Cosine similarity
|
| 229 |
import numpy as np
|
| 230 |
scores = [np.dot(chunk_emb, query_embedding)/(np.linalg.norm(chunk_emb)*np.linalg.norm(query_embedding)) for chunk_emb in chunk_embeddings]
|
| 231 |
best_idx = int(np.argmax(scores))
|
| 232 |
return chunks[best_idx]
|
| 233 |
-
|
| 234 |
def extractive_summary(text, max_sentences=3):
|
| 235 |
sentences = sent_tokenize(text)
|
| 236 |
keywords = query.lower().split()
|
|
@@ -238,24 +239,28 @@ async def search_docs(query: str, config: RunnableConfig) -> str:
|
|
| 238 |
if scored:
|
| 239 |
return ' '.join(scored[:max_sentences])
|
| 240 |
return ' '.join(sentences[:max_sentences])
|
| 241 |
-
|
| 242 |
compressed_contexts = []
|
| 243 |
for doc in docs:
|
| 244 |
-
# Chunking: split by paragraphs or headings
|
| 245 |
if hasattr(doc, 'chunks') and doc.chunks:
|
| 246 |
best_chunk = select_best_chunk(doc.chunks, query)
|
| 247 |
summary = extractive_summary(best_chunk)
|
| 248 |
else:
|
| 249 |
-
# Fallback: split by paragraphs
|
| 250 |
paragraphs = doc.page_content.split('\n\n')
|
| 251 |
best_chunk = select_best_chunk(paragraphs, query) if paragraphs else doc.page_content
|
| 252 |
summary = extractive_summary(best_chunk)
|
| 253 |
compressed_contexts.append(summary)
|
| 254 |
-
|
| 255 |
context = "\n\n".join(compressed_contexts)
|
|
|
|
|
|
|
|
|
|
| 256 |
return f"📄 Found {len(docs)} relevant documents (chunked & summarized):\n\n{context}"
|
| 257 |
else:
|
| 258 |
context = ""
|
|
|
|
|
|
|
|
|
|
| 259 |
return "No relevant documents found in the knowledge base."
|
| 260 |
except Exception as e:
|
| 261 |
logger.error(f"❌ Error searching documents: {e}")
|
|
|
|
| 203 |
|
| 204 |
try:
|
| 205 |
# Search in the specified knowledge base
|
| 206 |
+
import time
|
| 207 |
+
t0 = time.time()
|
| 208 |
docs = await lancedb_service.similarity_search(query, userid, knowledge_base)
|
| 209 |
+
t1 = time.time()
|
| 210 |
if docs:
|
| 211 |
# Advanced extractive summarization using NLTK
|
| 212 |
try:
|
|
|
|
| 215 |
from nltk.tokenize import sent_tokenize
|
| 216 |
except ImportError:
|
| 217 |
sent_tokenize = lambda x: x.split('.')
|
| 218 |
+
t2 = time.time()
|
| 219 |
# Embedding-based chunk selection
|
| 220 |
try:
|
| 221 |
from sentence_transformers import SentenceTransformer
|
| 222 |
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
| 223 |
except ImportError:
|
| 224 |
embedder = None
|
| 225 |
+
t3 = time.time()
|
| 226 |
def select_best_chunk(chunks, query):
|
| 227 |
if not embedder or not chunks:
|
| 228 |
return chunks[0] if chunks else ""
|
| 229 |
chunk_embeddings = embedder.encode(chunks)
|
| 230 |
query_embedding = embedder.encode([query])[0]
|
|
|
|
| 231 |
import numpy as np
|
| 232 |
scores = [np.dot(chunk_emb, query_embedding)/(np.linalg.norm(chunk_emb)*np.linalg.norm(query_embedding)) for chunk_emb in chunk_embeddings]
|
| 233 |
best_idx = int(np.argmax(scores))
|
| 234 |
return chunks[best_idx]
|
|
|
|
| 235 |
def extractive_summary(text, max_sentences=3):
|
| 236 |
sentences = sent_tokenize(text)
|
| 237 |
keywords = query.lower().split()
|
|
|
|
| 239 |
if scored:
|
| 240 |
return ' '.join(scored[:max_sentences])
|
| 241 |
return ' '.join(sentences[:max_sentences])
|
| 242 |
+
t4 = time.time()
|
| 243 |
compressed_contexts = []
|
| 244 |
for doc in docs:
|
|
|
|
| 245 |
if hasattr(doc, 'chunks') and doc.chunks:
|
| 246 |
best_chunk = select_best_chunk(doc.chunks, query)
|
| 247 |
summary = extractive_summary(best_chunk)
|
| 248 |
else:
|
|
|
|
| 249 |
paragraphs = doc.page_content.split('\n\n')
|
| 250 |
best_chunk = select_best_chunk(paragraphs, query) if paragraphs else doc.page_content
|
| 251 |
summary = extractive_summary(best_chunk)
|
| 252 |
compressed_contexts.append(summary)
|
| 253 |
+
t5 = time.time()
|
| 254 |
context = "\n\n".join(compressed_contexts)
|
| 255 |
+
t6 = time.time()
|
| 256 |
+
import logging
|
| 257 |
+
logging.info(f"[Latency] Document search: {t1-t0:.3f}s, NLTK setup: {t2-t1:.3f}s, Embedding setup: {t3-t2:.3f}s, Function setup: {t4-t3:.3f}s, Chunking/summarization: {t5-t4:.3f}s, Context join: {t6-t5:.3f}s, Total: {t6-t0:.3f}s")
|
| 258 |
return f"📄 Found {len(docs)} relevant documents (chunked & summarized):\n\n{context}"
|
| 259 |
else:
|
| 260 |
context = ""
|
| 261 |
+
t7 = time.time()
|
| 262 |
+
import logging
|
| 263 |
+
logging.info(f"[Latency] Document search: {t1-t0:.3f}s, No docs found, Total: {t7-t0:.3f}s")
|
| 264 |
return "No relevant documents found in the knowledge base."
|
| 265 |
except Exception as e:
|
| 266 |
logger.error(f"❌ Error searching documents: {e}")
|