""" ZEN Dual-Engine AI โ€” GPT-5 (OpenAI) + Nano-Banana (Gemini) Gradio 5.49.1 Space with in-UI API keys, chat history, optional image (Gemini), telemetry, starter prompts, and robust OpenAI param fallbacks. Key robustness: - Auto-retry OpenAI call with `max_completion_tokens` if model rejects `max_tokens`. - Auto-retry OpenAI call without `temperature` if model only allows the default. - Clean Gradio 5.49.1 queue usage (no deprecated args). """ import os import time import base64 from io import BytesIO from typing import List, Tuple, Dict, Any import gradio as gr # ----------------------------- # Constants & Defaults # ----------------------------- APP_TITLE = "๐Ÿ”ฎ ZEN Dual-Engine AI โ€” GPT-5 + Nano-Banana" DEFAULT_OPENAI_MODEL = "gpt-5" # Adjust if your account uses a different label DEFAULT_GEMINI_MODEL = "gemini-2.5-nano-banana" SYSTEM_DEFAULT = ( "You are ZEN Assistant. Respond concisely, accurately, and helpfully. " "If an image is provided, analyze it clearly. Avoid unsafe advice." ) STARTER_PROMPTS: List[str] = [ "๐Ÿ’ก Brainstorm 7 AI-powered product ideas that tackle youth education gaps.", "๐Ÿ“š Draft a 4-week AI literacy micro-curriculum with hands-on labs.", "๐Ÿงช Design an experiment to compare GPT-5 vs Nano-Banana on code generation.", "๐ŸŽจ Describe a museum exhibit that visualizes the history of AI in America.", "๐Ÿ› ๏ธ Generate a Python function that converts a PDF to clean Markdown.", "๐Ÿช Write a sci-fi scene about a student building an AI on the Moon.", "๐Ÿ” Summarize the pros/cons of agentic workflows for startups.", "๐Ÿ“ˆ Propose a metrics dashboard for measuring AI program impact.", ] # Very light guardrail against trivial injection/script pastes BLOCKLIST = ["", "{{", "}}"] # ----------------------------- # Lazy Imports (boot even if SDKs are missing) # ----------------------------- def _lazy_import_openai(): try: from openai import OpenAI # openai>=1.0 interface return OpenAI except Exception as e: raise RuntimeError(f"OpenAI SDK not available: {e}") def _lazy_import_gemini(): try: import google.generativeai as genai return genai except Exception as e: raise RuntimeError(f"Google Generative AI SDK not available: {e}") # ----------------------------- # Utilities # ----------------------------- def is_blocked(text: str) -> bool: if not text: return False low = text.lower() return any(tok in low for tok in BLOCKLIST) def pil_to_base64(image) -> str: """Convert PIL image to base64 JPEG (if you ever need raw bytes).""" buffer = BytesIO() image.convert("RGB").save(buffer, format="JPEG", quality=92) return base64.b64encode(buffer.getvalue()).decode("utf-8") def approx_tokens_from_chars(text: str) -> int: return int(len(text or "") / 4) def estimate_cost(provider_label: str, model: str, prompt: str, reply: str) -> float: """ Super rough illustrative CPMs. Adjust to your billing reality. """ toks = approx_tokens_from_chars(prompt) + approx_tokens_from_chars(reply) if provider_label.startswith("OpenAI"): return round(toks / 1_000_000.0 * 7.5, 4) # illustrative return round(toks / 1_000_000.0 * 5.0, 4) # illustrative # ----------------------------- # Providers # ----------------------------- def call_openai_chat( api_key: str, model: str, system_prompt: str, history_messages: List[Dict[str, str]], user_message: str, temperature: float, max_tokens: int, ) -> str: """ Calls OpenAI Chat Completions with adaptive parameter handling: - If model rejects `max_tokens`, retry with `max_completion_tokens`. - If model rejects non-default `temperature`, retry omitting temperature (server default). """ OpenAI = _lazy_import_openai() client = OpenAI(api_key=api_key) messages = [{"role": "system", "content": (system_prompt.strip() or SYSTEM_DEFAULT)}] messages.extend(history_messages or []) messages.append({"role": "user", "content": user_message}) base_kwargs = dict( model=(model.strip() or DEFAULT_OPENAI_MODEL), messages=messages, ) # Try #1: legacy `max_tokens` + provided temperature try: kwargs_try = dict(base_kwargs) kwargs_try["temperature"] = float(temperature) kwargs_try["max_tokens"] = int(max_tokens) resp = client.chat.completions.create(**kwargs_try) return resp.choices[0].message.content except Exception as e1: msg1 = str(e1) # If model wants `max_completion_tokens` needs_mct = ("max_tokens" in msg1 and "max_completion_tokens" in msg1) or "Unsupported parameter" in msg1 # If model wants default temperature only temp_default_only = ("temperature" in msg1) and ("unsupported_value" in msg1 or "Only the default" in msg1) # Path A: fix tokens first, keep temperature if needs_mct and not temp_default_only: try: kwargs_try = dict(base_kwargs) kwargs_try["temperature"] = float(temperature) kwargs_try["max_completion_tokens"] = int(max_tokens) resp = client.chat.completions.create(**kwargs_try) return resp.choices[0].message.content except Exception as e2: msg2 = str(e2) # If that new attempt also complains about temperature, handle below temp_default_only = ("temperature" in msg2) and ("unsupported_value" in msg2 or "Only the default" in msg2) # Path B: fix temperature only (omit it), keep legacy tokens if temp_default_only and not needs_mct: try: kwargs_try = dict(base_kwargs) kwargs_try["max_tokens"] = int(max_tokens) resp = client.chat.completions.create(**kwargs_try) return resp.choices[0].message.content except Exception as e3: msg3 = str(e3) # If now it also wants max_completion_tokens, do both needs_mct = ("max_tokens" in msg3 and "max_completion_tokens" in msg3) or "Unsupported parameter" in msg3 # Path C: needs both fixes (no temperature + max_completion_tokens) if needs_mct and temp_default_only: kwargs_try = dict(base_kwargs) kwargs_try["max_completion_tokens"] = int(max_tokens) resp = client.chat.completions.create(**kwargs_try) # omit temperature return resp.choices[0].message.content # If none matched, re-raise original error raise def call_gemini_generate( api_key: str, model: str, system_prompt: str, user_message: str, image=None, temperature: float = 0.4, ) -> str: """ Calls Gemini (including Nano-Banana variants). Supports optional PIL image. """ genai = _lazy_import_gemini() genai.configure(api_key=api_key) # relaxed demo thresholds; adjust per policy as needed safety_settings = [ {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_SEXUAL_CONTENT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, ] model_obj = genai.GenerativeModel( model_name=(model.strip() or DEFAULT_GEMINI_MODEL), system_instruction=(system_prompt.strip() or SYSTEM_DEFAULT), safety_settings=safety_settings, generation_config={"temperature": float(temperature)}, ) parts: List[Any] = [user_message or ""] if image is not None: parts.append(image) # PIL image supported directly resp = model_obj.generate_content(parts) if hasattr(resp, "text") and resp.text: return resp.text cand = getattr(resp, "candidates", None) if cand and getattr(cand[0], "content", None): parts = getattr(cand[0].content, "parts", None) if parts and hasattr(parts[0], "text"): return parts[0].text return "(No response text returned.)" # ----------------------------- # Orchestration # ----------------------------- def to_openai_history(gradio_history: List[Tuple[str, str]]) -> List[Dict[str, str]]: """ Convert Gradio Chatbot history ([(user, assistant), ...]) to OpenAI role format. """ oai: List[Dict[str, str]] = [] for user_msg, ai_msg in gradio_history or []: if user_msg: oai.append({"role": "user", "content": user_msg}) if ai_msg: oai.append({"role": "assistant", "content": ai_msg}) return oai def infer( provider_label: str, openai_key: str, google_key: str, model_name: str, system_prompt: str, user_message: str, image, temperature: float, max_tokens: int, history: List[Tuple[str, str]], ): """ Main inference entry: routes to OpenAI or Gemini, measures latency, estimates cost, and appends the turn to the chat history. """ if not (user_message and user_message.strip()): raise gr.Error("Please enter a prompt (or pick a starter prompt).") if is_blocked(user_message): assistant = "Request blocked by safety policy. Please rephrase." history = history or [] history.append((user_message, assistant)) return history, 0, 0.0 t0 = time.time() history = history or [] if provider_label.startswith("OpenAI"): api_key = (openai_key or "").strip() if not api_key: raise gr.Error("Enter your OpenAI API key in the Settings accordion.") oai_history = to_openai_history(history) reply = call_openai_chat( api_key=api_key, model=model_name or DEFAULT_OPENAI_MODEL, system_prompt=system_prompt or SYSTEM_DEFAULT, history_messages=oai_history, user_message=user_message, temperature=temperature, max_tokens=max_tokens, ) else: api_key = (google_key or "").strip() if not api_key: raise gr.Error("Enter your Google Gemini API key in the Settings accordion.") reply = call_gemini_generate( api_key=api_key, model=model_name or DEFAULT_GEMINI_MODEL, system_prompt=system_prompt or SYSTEM_DEFAULT, user_message=user_message, image=image, temperature=temperature, ) latency_ms = int((time.time() - t0) * 1000) cost_est = estimate_cost(provider_label, model_name, user_message, reply) history.append((user_message, reply)) return history, latency_ms, cost_est # ----------------------------- # UI # ----------------------------- with gr.Blocks(fill_height=True, theme=gr.themes.Soft(), title="ZEN Dual-Engine AI") as demo: gr.Markdown("# " + APP_TITLE) gr.Markdown( "Pick your engine, paste your API key(s), and start creating. " "Keys entered here are **session-only**. For permanent use, set **Space Secrets**." ) with gr.Row(): with gr.Column(scale=3, min_width=380): provider = gr.Radio( ["OpenAI (GPT-5)", "Google (Nano-Banana)"], value="OpenAI (GPT-5)", label="Engine" ) model_name = gr.Textbox( label="Model name", value=DEFAULT_OPENAI_MODEL, placeholder=f"e.g., {DEFAULT_OPENAI_MODEL} or {DEFAULT_GEMINI_MODEL}" ) system_prompt = gr.Textbox( label="System prompt", value=SYSTEM_DEFAULT, lines=3, info="Controls assistant behavior/persona." ) with gr.Accordion("๐Ÿ”‘ Settings โ€ข Bring Your Own Keys (session-only)", open=True): openai_api_key = gr.Textbox( label="OPENAI_API_KEY (for GPT-5 path)", type="password", placeholder="sk-..." ) google_api_key = gr.Textbox( label="GOOGLE_API_KEY (for Nano-Banana path)", type="password", placeholder="AIza..." ) gr.Markdown( "You can also add `OPENAI_API_KEY` and `GOOGLE_API_KEY` in the Space **Repository Secrets**." ) user_message = gr.Textbox( label="Your prompt", placeholder="Ask anythingโ€ฆ or pick a starter prompt below.", lines=5 ) with gr.Row(): temperature = gr.Slider( 0.0, 1.0, value=0.5, step=0.05, label="Temperature (some OpenAI models ignore non-default)" ) max_tokens = gr.Slider( 128, 4096, value=1024, step=64, label="Max completion tokens (OpenAI path)" ) with gr.Row(): send = gr.Button("๐Ÿš€ Generate", variant="primary") clear = gr.Button("๐Ÿงน Clear chat") with gr.Column(scale=4, min_width=480): chat = gr.Chatbot( label="Conversation", height=440, type="messages", # OpenAI-style roles internally avatar_images=(None, None), ) with gr.Row(): latency = gr.Number(label="Latency (ms)", interactive=False) cost = gr.Number(label="Est. cost (USD)", interactive=False) with gr.Accordion("๐Ÿ–ผ๏ธ Optional: Image (Gemini path supports vision)", open=False): image = gr.Image( label="Upload image for analysis (used only on Google/Gemini path)", type="pil" ) with gr.Accordion("โœจ Starter Prompts", open=True): starters = gr.Dataset( components=[gr.Textbox(visible=False)], samples=[[p] for p in STARTER_PROMPTS], type="index", label="Click a row to load a starter prompt into the input." ) gr.Markdown( "- Try the same prompt on both engines and compare.\n" "- Safety: blocks obvious injection/script patterns." ) # ------------------------- # Events # ------------------------- def on_starter_select(evt: gr.SelectData): idx = evt.index if isinstance(idx, (list, tuple)): idx = idx[0] try: return STARTER_PROMPTS[int(idx)] except Exception: return STARTER_PROMPTS[0] starters.select(on_starter_select, outputs=[user_message]) def on_send(provider, oai_key, g_key, model, sys, msg, img, temp, maxtok, hist): return infer(provider, oai_key, g_key, model, sys, msg, img, float(temp), int(maxtok), hist) send.click( on_send, inputs=[ provider, openai_api_key, google_api_key, model_name, system_prompt, user_message, image, temperature, max_tokens, chat ], outputs=[chat, latency, cost], show_progress="minimal" ) def on_clear(): return [], 0, 0.0, None, "" clear.click(on_clear, outputs=[chat, latency, cost, image, user_message]) # ----------------------------- # Main # ----------------------------- if __name__ == "__main__": demo.queue(max_size=64).launch()