Ahmed-El-Sharkawy commited on
Commit
d128e02
·
verified ·
1 Parent(s): 7d4ed22

Uploud the app

Browse files
Files changed (2) hide show
  1. app_gradio.py +221 -0
  2. requirements.txt +5 -0
app_gradio.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, time, sys, asyncio
2
+ from typing import List, Dict
3
+ import gradio as gr
4
+ from dotenv import load_dotenv
5
+ from openai import OpenAI
6
+
7
+ # ---- Windows event loop fix ----
8
+ if sys.platform.startswith("win"):
9
+ try:
10
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
11
+ except Exception:
12
+ pass
13
+
14
+ # ---- Env ----
15
+ load_dotenv()
16
+ API_KEY = os.getenv("API_KEY")
17
+ HOST = os.getenv("HOST")
18
+ Embed_Model_Name = os.getenv("EMBEDDING_MODEL_NAME")
19
+ Reranker_Model_Name = os.getenv("RERANKER_MODEL_NAME")
20
+ K = int(os.getenv("K", "8"))
21
+ TOP_N = int(os.getenv("TOP_N", "5"))
22
+
23
+ RPM_LIMIT = 2
24
+ MIN_SECONDS_BETWEEN = 30
25
+ N_DIM = 384
26
+
27
+ # ---- OpenAI client ----
28
+ client = None
29
+ if API_KEY:
30
+ client = OpenAI(api_key=API_KEY, base_url="https://genai.ghaymah.systems")
31
+
32
+ # ---- Your RAG bits ----
33
+ from embedder import EmbeddingModel
34
+ from Reranker import Reranker
35
+
36
+ def safe_chat_complete(model: str, messages: List[Dict], max_tokens: int = 9000) -> str:
37
+ delays = [5, 10, 20]
38
+ attempt = 0
39
+ while True:
40
+ try:
41
+ resp = client.chat.completions.create(
42
+ model=model,
43
+ messages=messages,
44
+ max_tokens=max_tokens,
45
+ temperature=0.3,
46
+ timeout=60,
47
+ )
48
+ return resp.choices[0].message.content
49
+ except Exception as e:
50
+ msg = str(e)
51
+ if "429" in msg or "Rate Limit" in msg:
52
+ if attempt < len(delays):
53
+ time.sleep(delays[attempt]); attempt += 1
54
+ continue
55
+ raise
56
+
57
+ def build_single_system_context(query: str, max_total_chars: int = 9000, k: int = 10) -> str:
58
+ Embedder = EmbeddingModel(model_name=Embed_Model_Name)
59
+ RankerModel = Reranker(model_name=Reranker_Model_Name)
60
+ results = Embedder.retrieve_top_k_remote_texts(query, k=k, HOST=HOST)
61
+ Top_sort_results = RankerModel.rerank_results(query, results, top_n=TOP_N)
62
+
63
+ snippets, sources = [], []
64
+ for p in Top_sort_results:
65
+ txt = (p.get("text") or "").strip()
66
+ if not txt: continue
67
+ src = p.get("source")
68
+ if isinstance(src, str) and src: sources.append(src)
69
+ snippets.append(txt)
70
+
71
+ if not snippets:
72
+ return ("You are a strict RAG assistant. No context was retrieved from the vector store for this query. "
73
+ "If the answer is not present, say you don’t know.")
74
+
75
+ header = ("You are a strict RAG assistant. Answer ONLY using the provided context snippets. "
76
+ "If the answer is not present, say you don’t know. ")
77
+ body_budget = max_total_chars - len(header)
78
+ body_parts, used = [], 0
79
+ for snip in snippets:
80
+ piece = snip + "\n\n"
81
+ if used + len(piece) <= body_budget:
82
+ body_parts.append(piece); used += len(piece)
83
+ else:
84
+ break
85
+ seen, uniq_sources = set(), []
86
+ for s in sources:
87
+ if s not in seen:
88
+ uniq_sources.append(s); seen.add(s)
89
+ footer = "Sources:\n" + "\n".join(f"- {s}" for s in uniq_sources) + "\n" if uniq_sources else ""
90
+ return (header + "".join(body_parts) + footer).strip()
91
+
92
+ SYSTEM_SEED = "You are a strict RAG assistant. Answer ONLY using the provided context."
93
+ def init_state():
94
+ return {"messages": [{"role": "system", "content": SYSTEM_SEED}], "last_call_ts": None}
95
+
96
+ def can_call_now(state: dict) -> bool:
97
+ last = state.get("last_call_ts")
98
+ return True if last is None else (time.time() - last) >= MIN_SECONDS_BETWEEN
99
+
100
+ def record_call_time(state: dict):
101
+ state["last_call_ts"] = time.time()
102
+
103
+ def respond(user_message: str, state: dict):
104
+ # Basic env checks – we still show a bot response so the UI proves it’s working
105
+ missing = []
106
+ if not API_KEY: missing.append("API_KEY")
107
+ if not HOST: missing.append("HOST")
108
+ if not Embed_Model_Name: missing.append("EMBEDDING_MODEL_NAME")
109
+ if not Reranker_Model_Name: missing.append("RERANKER_MODEL_NAME")
110
+ if missing:
111
+ return (f"Config missing: {', '.join(missing)}. Set them in your .env and restart."), state
112
+
113
+ state["messages"].append({"role": "user", "content": user_message})
114
+
115
+ if not can_call_now(state):
116
+ remaining = int(MIN_SECONDS_BETWEEN - (time.time() - (state.get("last_call_ts") or 0)))
117
+ remaining = max(1, remaining)
118
+ msg = f"Rate limit in effect. Please wait ~{remaining} seconds."
119
+ state["messages"].append({"role": "assistant", "content": msg})
120
+ return msg, state
121
+
122
+ rag_ctx = build_single_system_context(query=user_message, max_total_chars=5000, k=K)
123
+ msgs = [{"role": "system", "content": rag_ctx}]
124
+ msgs.extend([m for m in state["messages"] if m["role"] != "system"][-10:])
125
+
126
+ try:
127
+ reply = safe_chat_complete("DeepSeek-V3-0324", msgs, max_tokens=1000)
128
+ record_call_time(state)
129
+ except Exception as e:
130
+ reply = f"Request failed: {e}"
131
+
132
+ state["messages"].append({"role": "assistant", "content": reply})
133
+ return reply, state
134
+
135
+ # ------------------- Gradio UI: messages API (Gradio >= 5) -------------------
136
+ with gr.Blocks(title="Ghaymah Chatbot (Gradio)") as demo:
137
+ gr.Markdown("# 🤖 Ghaymah Chatbot (Gradio)")
138
+ gr.Markdown(
139
+ "Vector store: **Connected** \n"
140
+ f"Embedder: `{Embed_Model_Name or 'unset'}` \n"
141
+ f"RPM limit: **{RPM_LIMIT}** (min {MIN_SECONDS_BETWEEN}s between calls) \n"
142
+ f"Gradio version: `{gr.__version__}`"
143
+ )
144
+
145
+ state = gr.State(init_state()) # {"messages": [...], "last_call_ts": ...}
146
+
147
+ # Start with an explicit empty list so it's never None
148
+ chatbot = gr.Chatbot(label="Chat", height=520, type="messages", value=[])
149
+
150
+ with gr.Row():
151
+ txt = gr.Textbox(
152
+ placeholder="Ask anything about the Ghaymah documentation…",
153
+ label="Your message",
154
+ lines=2,
155
+ autofocus=True,
156
+ )
157
+ with gr.Row():
158
+ send_btn = gr.Button("Send", variant="primary")
159
+ clear_btn = gr.Button("Clear")
160
+
161
+ # Step 1: add a user message immediately
162
+ def _on_user_submit(user_input, chat_messages):
163
+ try:
164
+ if not user_input:
165
+ return "", (chat_messages or [])
166
+ chat_messages = chat_messages or [] # guard for None
167
+ updated = chat_messages + [{"role": "user", "content": user_input}]
168
+ print("[on_submit] user:", user_input)
169
+ return "", updated
170
+ except Exception as e:
171
+ print("[on_submit][ERROR]", repr(e))
172
+ # keep textbox text so you can retry; don't mutate chat on error
173
+ return user_input, (chat_messages or [])
174
+
175
+ txt.submit(_on_user_submit, [txt, chatbot], [txt, chatbot])
176
+ send_btn.click(_on_user_submit, [txt, chatbot], [txt, chatbot])
177
+
178
+ # Step 2: call backend and append assistant message
179
+ def _bot_step(chat_messages, state):
180
+ try:
181
+ chat_messages = chat_messages or []
182
+ last_user = None
183
+ for msg in reversed(chat_messages):
184
+ if msg.get("role") == "user" and isinstance(msg.get("content"), str):
185
+ last_user = msg["content"]
186
+ break
187
+ if last_user is None:
188
+ print("[bot_step] no user message found")
189
+ return chat_messages, state
190
+
191
+ print("[bot_step] responding to:", last_user)
192
+ bot_reply, new_state = respond(last_user, state) # <-- your 2-arg respond
193
+
194
+ updated = chat_messages + [{"role": "assistant", "content": bot_reply}]
195
+ return updated, new_state
196
+
197
+ except Exception as e:
198
+ print("[bot_step][ERROR]", repr(e))
199
+ # show the error in the chat so you see *something* in the UI
200
+ updated = (chat_messages or []) + [
201
+ {"role": "assistant", "content": f"⚠️ Internal error: {e}"}
202
+ ]
203
+ return updated, state
204
+
205
+ # Submit (Enter)
206
+ txt.submit(_on_user_submit, [txt, chatbot], [txt, chatbot])\
207
+ .then(_bot_step, [chatbot, state], [chatbot, state])
208
+
209
+ # Click (Send)
210
+ send_btn.click(_on_user_submit, [txt, chatbot], [txt, chatbot])\
211
+ .then(_bot_step, [chatbot, state], [chatbot, state])
212
+
213
+ def _clear():
214
+ print("[clear] resetting state and chat")
215
+ return [], init_state()
216
+
217
+ clear_btn.click(_clear, outputs=[chatbot, state])
218
+
219
+ if __name__ == "__main__":
220
+ demo.queue()
221
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai
2
+ dotenv
3
+ sentence-transformers
4
+ hf_xet
5
+ gradio