ZENLLC commited on
Commit
3d09d18
·
verified ·
1 Parent(s): 71a05eb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +222 -0
app.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import base64
4
+ from io import BytesIO
5
+
6
+ import gradio as gr
7
+ from dotenv import load_dotenv
8
+
9
+ # ---- Load .env if present (not required) ----
10
+ load_dotenv()
11
+
12
+ # Lazy imports so the Space can boot even if one SDK is missing
13
+ def _lazy_import_openai():
14
+ try:
15
+ from openai import OpenAI
16
+ return OpenAI
17
+ except Exception as e:
18
+ raise RuntimeError(f"OpenAI SDK not available: {e}")
19
+
20
+ def _lazy_import_gemini():
21
+ try:
22
+ import google.generativeai as genai
23
+ return genai
24
+ except Exception as e:
25
+ raise RuntimeError(f"Google Generative AI SDK not available: {e}")
26
+
27
+ APP_TITLE = "ZEN Dual-Engine AI — GPT-5 + Nano-Banana (Gemini)"
28
+ SYSTEM_DEFAULT = (
29
+ "You are ZEN Assistant. Be concise, accurate, and actionable. "
30
+ "Cite sources when asked. Avoid sensitive data. If an image is provided, describe or analyze it clearly."
31
+ )
32
+
33
+ # ---- Core model callers ----
34
+ def call_openai(api_key:str, model:str, messages:list, temperature:float=0.4, max_tokens:int=1024):
35
+ OpenAI = _lazy_import_openai()
36
+ client = OpenAI(api_key=api_key)
37
+ # OpenAI chat.completions remains widely supported across models
38
+ resp = client.chat.completions.create(
39
+ model=model,
40
+ messages=messages,
41
+ temperature=temperature,
42
+ max_tokens=max_tokens,
43
+ )
44
+ return resp.choices[0].message.content
45
+
46
+ def _pil_to_base64(image):
47
+ buffer = BytesIO()
48
+ image.convert("RGB").save(buffer, format="JPEG", quality=90)
49
+ return base64.b64encode(buffer.getvalue()).decode("utf-8")
50
+
51
+ def call_gemini(api_key:str, model:str, user_text:str, system_prompt:str, image=None, temperature:float=0.4):
52
+ genai = _lazy_import_gemini()
53
+ genai.configure(api_key=api_key)
54
+ safety_settings = [
55
+ {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
56
+ {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
57
+ {"category": "HARM_CATEGORY_SEXUAL_CONTENT", "threshold": "BLOCK_NONE"},
58
+ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
59
+ ]
60
+ generation_config = {
61
+ "temperature": temperature,
62
+ }
63
+ model_obj = genai.GenerativeModel(
64
+ model_name=model,
65
+ system_instruction=system_prompt,
66
+ safety_settings=safety_settings,
67
+ generation_config=generation_config,
68
+ )
69
+ parts = [user_text.strip() if user_text else ""]
70
+ if image is not None:
71
+ # Gemini supports direct PIL images as parts
72
+ parts.append(image)
73
+
74
+ resp = model_obj.generate_content(parts)
75
+ if hasattr(resp, "text") and resp.text:
76
+ return resp.text
77
+ # Fallback for candidates API shape
78
+ if getattr(resp, "candidates", None):
79
+ return resp.candidates[0].content.parts[0].text
80
+ return "(No response text returned.)"
81
+
82
+ # ---- Guardrails ----
83
+ BLOCKLIST = ["{{", "}}", "<script", "</script>"]
84
+
85
+ def blocked(text:str) -> bool:
86
+ if not text:
87
+ return False
88
+ low = text.lower()
89
+ return any(tok in low for tok in BLOCKLIST)
90
+
91
+ # ---- Orchestrator ----
92
+ def infer(
93
+ provider, # "OpenAI (GPT-5)" or "Google (Nano-Banana)"
94
+ openai_api_key,
95
+ google_api_key,
96
+ model_name,
97
+ system_prompt,
98
+ user_message,
99
+ image,
100
+ temperature,
101
+ max_tokens,
102
+ history
103
+ ):
104
+ history = history or []
105
+ t0 = time.time()
106
+
107
+ if provider.startswith("OpenAI"):
108
+ api_key = openai_api_key or os.getenv("OPENAI_API_KEY", "")
109
+ if not api_key:
110
+ raise gr.Error("Add your OpenAI API key in Settings.")
111
+ # Build OpenAI messages array (ignore image for OpenAI path to avoid SDK variance)
112
+ messages = [{"role": "system", "content": system_prompt.strip() or SYSTEM_DEFAULT}]
113
+ for h in history:
114
+ messages.append({"role":"user","content": h[0]})
115
+ messages.append({"role":"assistant","content": h[1]})
116
+ messages.append({"role":"user","content": user_message})
117
+
118
+ if blocked(user_message):
119
+ assistant = "Request blocked by safety policy. Please rephrase."
120
+ else:
121
+ assistant = call_openai(api_key, model_name.strip() or "gpt-5", messages, temperature, max_tokens)
122
+
123
+ else:
124
+ api_key = google_api_key or os.getenv("GOOGLE_API_KEY", "") or os.getenv("GEMINI_API_KEY", "")
125
+ if not api_key:
126
+ raise gr.Error("Add your Google (Gemini) API key in Settings.")
127
+ # Gemini supports multimodal messages; we pass the current prompt + optional image
128
+ if blocked(user_message):
129
+ assistant = "Request blocked by safety policy. Please rephrase."
130
+ else:
131
+ assistant = call_gemini(
132
+ api_key=api_key,
133
+ model=model_name.strip() or "gemini-2.5-nano-banana",
134
+ user_text=user_message,
135
+ system_prompt=(system_prompt.strip() or SYSTEM_DEFAULT),
136
+ image=image,
137
+ temperature=temperature
138
+ )
139
+
140
+ latency_ms = int((time.time() - t0) * 1000)
141
+ cost_est = estimate_cost(provider, model_name, user_message, assistant)
142
+
143
+ history.append((user_message, assistant))
144
+ return history, latency_ms, cost_est
145
+
146
+ # Cheap & cheerful cost estimate stub (tokens ≈ chars/4)
147
+ def estimate_cost(provider, model, prompt, reply):
148
+ prompt_toks = int(len(prompt or "") / 4)
149
+ reply_toks = int(len(reply or "") / 4)
150
+ total = prompt_toks + reply_toks
151
+
152
+ # You can tune these easily
153
+ if provider.startswith("OpenAI"):
154
+ # Example: $3 / 1M input + $12 / 1M output → blended rough rate
155
+ dollars = total / 1_000_000.0 * 7.5
156
+ else:
157
+ # Example Gemini/Nano-Banana nominal placeholder
158
+ dollars = total / 1_000_000.0 * 5.0
159
+ return round(dollars, 4)
160
+
161
+ # ---- UI ----
162
+ with gr.Blocks(fill_height=True, theme=gr.themes.Soft()) as demo:
163
+ gr.Markdown(f"# {APP_TITLE}")
164
+
165
+ with gr.Row():
166
+ with gr.Column(scale=3):
167
+ provider = gr.Radio(
168
+ ["OpenAI (GPT-5)", "Google (Nano-Banana)"],
169
+ value="OpenAI (GPT-5)",
170
+ label="Engine"
171
+ )
172
+ model_name = gr.Textbox(
173
+ label="Model name",
174
+ value="gpt-5",
175
+ placeholder="e.g., gpt-5 • gemini-2.5-nano-banana"
176
+ )
177
+ system_prompt = gr.Textbox(label="System prompt", value=SYSTEM_DEFAULT, lines=3)
178
+
179
+ user_message = gr.Textbox(label="Your message", placeholder="Ask anything…", lines=4)
180
+ image = gr.Image(label="Optional image (Gemini path supports vision)", type="pil")
181
+
182
+ with gr.Row():
183
+ temperature = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Temperature")
184
+ max_tokens = gr.Slider(128, 4096, value=1024, step=64, label="Max tokens")
185
+
186
+ send = gr.Button("Send", variant="primary")
187
+
188
+ with gr.Column(scale=4):
189
+ chat = gr.Chatbot(label="Conversation", height=420, type="messages")
190
+ with gr.Row():
191
+ latency = gr.Number(label="Latency (ms)", interactive=False)
192
+ cost = gr.Number(label="Est. cost (USD)", interactive=False)
193
+
194
+ with gr.Accordion("Settings • Bring Your Own Keys", open=False):
195
+ openai_api_key = gr.Textbox(
196
+ label="OPENAI_API_KEY", type="password",
197
+ placeholder="sk-... (kept in session; not saved)"
198
+ )
199
+ google_api_key = gr.Textbox(
200
+ label="GOOGLE_API_KEY (Gemini)", type="password",
201
+ placeholder="AIza... (kept in session; not saved)"
202
+ )
203
+ gr.Markdown(
204
+ "You can also set environment variables `OPENAI_API_KEY` and `GOOGLE_API_KEY` "
205
+ "in the Space Secrets for a smoother experience."
206
+ )
207
+
208
+ def _on_send(provider, openai_key, google_key, model, sys, msg, img, temp, maxtok, hist):
209
+ if not (msg and msg.strip()):
210
+ raise gr.Error("Type a message first.")
211
+ return infer(provider, openai_key, google_key, model, sys, msg, img, temp, int(maxtok), hist)
212
+
213
+ send.click(
214
+ _on_send,
215
+ inputs=[provider, openai_api_key, google_api_key, model_name, system_prompt, user_message, image, temperature, max_tokens, chat],
216
+ outputs=[chat, latency, cost],
217
+ show_progress="minimal"
218
+ )
219
+
220
+ if __name__ == "__main__":
221
+ # queue() avoids race conditions under load on Spaces
222
+ demo.queue(max_size=64).launch()