import base64 import io from typing import List, Tuple, Optional import gradio as gr from PIL import Image # ----------------------- # OpenAI + Google helpers # ----------------------- def _get_openai_client(api_key: str): from openai import OpenAI # local import so app still loads if lib missing return OpenAI(api_key=api_key) def _configure_google(api_key: str): import google.generativeai as genai genai.configure(api_key=api_key) return genai # ----------------------- # Prompt / preset logic # ----------------------- def apply_preset_to_prompt( base_prompt: str, preset: str, style: str, content_type: str, ) -> str: base_prompt = base_prompt.strip() preset_addons = { "None": "", "ZEN Glass Dashboard": ( " ultra-detailed UI, glassmorphism, prismatic alloy panels, " "neon cyan and magenta HUD overlays, high-end enterprise dashboard" ), "Palantir / Anduril Infographic": ( " dark enterprise command-center aesthetic, clean vector infographics, " "military-grade analytics overlays, sharp typography, high contrast, " "minimal but dense information layout" ), "Youth AI Literacy Poster": ( " vibrant educational poster for teens, clean icons, diverse students, " "friendly but serious tone, clear typography, classroom-ready layout" ), "ZEN AI Arena Card": ( " holographic trading card style, quantum glass edges, subtle glow, " "sharp logo lockup, futuristic typography, dramatic lighting" ), "Blueprint / Systems Diagram": ( " technical blueprint, white lines on deep navy background, callout labels, " "flow arrows, system nodes, engineering drawing style" ), } style_addons = { "Default": "", "Photoreal": " hyper-realistic photography, physically based lighting", "Illustration": " clean vector illustration style, flat colors, crisp lines", "Futuristic UI": " futuristic interface design, HUD, holographic widgets", "Blueprint": " blueprint drawing, schematic lines, engineering grid", "Cinematic": " cinematic lighting, dramatic composition, filmic contrast", } if content_type == "Image": ct_addon = " high-resolution concept art," elif content_type == "Infographic Spec": ct_addon = ( " detailed infographic design specification, including layout regions, " "sections, labels, and visual hierarchy," ) else: ct_addon = "" extra = " ".join( x for x in [ ct_addon, preset_addons.get(preset, ""), style_addons.get(style, ""), ] if x ) if extra: if base_prompt: return f"{base_prompt}, {extra}" else: return extra.strip() return base_prompt or "high quality image" # ----------------------- # OpenAI text + images # ----------------------- def generate_text_openai( api_key: str, prompt: str, mode: str, ) -> str: client = _get_openai_client(api_key) system_msg = ( "You are an expert creator for the ZEN AI ecosystem. " "Write clear, concise, high-leverage content. " "If mode is 'Infographic Spec', output a structured outline with sections, " "titles, short captions, and suggested visual elements." ) if mode == "Infographic Spec": user_prompt = ( "Create a Palantir/Anduril-level infographic specification based on this topic:\n\n" f"{prompt}\n\n" "Return:\n" "1) Title options\n" "2) 3โ€“5 main sections\n" "3) Bullet points for each section\n" "4) Suggested charts/visuals\n" "5) Color and typography recommendations." ) else: user_prompt = prompt resp = client.chat.completions.create( model="gpt-4.1-mini", messages=[ {"role": "system", "content": system_msg}, {"role": "user", "content": user_prompt}, ], temperature=0.7, ) return resp.choices[0].message.content def decode_b64_images(b64_list: List[str]) -> List[Image.Image]: images: List[Image.Image] = [] for b64 in b64_list: raw = base64.b64decode(b64) img = Image.open(io.BytesIO(raw)).convert("RGB") images.append(img) return images def generate_image_openai( api_key: str, model: str, prompt: str, size: str, quality: str, n_images: int, seed: Optional[int], ) -> List[Image.Image]: client = _get_openai_client(api_key) size_map = { "Square (1024x1024)": "1024x1024", "Portrait (1024x1792)": "1024x1792", "Landscape (1792x1024)": "1792x1024", } size_param = size_map.get(size, "1024x1024") kwargs = { "model": model, "prompt": prompt, "size": size_param, "n": n_images, } # Allowed values from API: low, medium, high, auto allowed_qualities = {"low", "medium", "high", "auto"} if quality in allowed_qualities: kwargs["quality"] = quality if seed is not None: kwargs["seed"] = seed resp = client.images.generate(**kwargs) b64_list = [d.b64_json for d in resp.data] return decode_b64_images(b64_list) # ----------------------- # Google (Gemini / Nano-Banana) # ----------------------- def generate_text_google( api_key: str, prompt: str, mode: str, ) -> str: genai = _configure_google(api_key) model = genai.GenerativeModel("gemini-1.5-pro") if mode == "Infographic Spec": content = ( "You are an expert enterprise communicator. " "Create a Palantir/Anduril-grade infographic spec.\n\n" f"Topic / prompt:\n{prompt}\n\n" "Return:\n" "1) Title options\n" "2) Main sections with bullet points\n" "3) Visual layout ideas\n" "4) Chart/visualization suggestions\n" "5) Palette & typography notes." ) else: content = prompt resp = model.generate_content(content) return resp.text def generate_image_google( api_key: str, google_image_model: str, prompt: str, n_images: int, seed: Optional[int], ) -> List[Image.Image]: """ Uses a Google / Gemini image-capable model that returns inline image bytes. If your Nano-Banana model behaves differently, adjust this function. """ genai = _configure_google(api_key) model = genai.GenerativeModel(google_image_model) images: List[Image.Image] = [] for i in range(n_images): generation_config = {} if seed is not None: generation_config["seed"] = seed + i resp = model.generate_content( prompt, generation_config=generation_config or None, ) candidates = getattr(resp, "candidates", []) or [] for cand in candidates: content = getattr(cand, "content", None) if not content: continue parts = getattr(content, "parts", []) or [] for part in parts: inline = getattr(part, "inline_data", None) if inline and getattr(inline, "data", None): try: raw = base64.b64decode(inline.data) img = Image.open(io.BytesIO(raw)).convert("RGB") images.append(img) except Exception: continue return images # ----------------------- # Core callback with provider fallback # ----------------------- def run_generation( openai_key: str, google_key: str, task_type: str, provider: str, base_prompt: str, negative_prompt: str, preset: str, style: str, size: str, quality: str, n_images: int, seed: int, use_seed: bool, google_image_model: str, google_text_model_hint: str, # currently just logged ) -> Tuple[str, List[Image.Image], str]: text_output = "" images: List[Image.Image] = [] debug_lines = [] if not base_prompt.strip(): return "Please enter a prompt.", [], "No prompt provided." content_type = "Image" if task_type == "Image" else task_type full_prompt = apply_preset_to_prompt( base_prompt=base_prompt, preset=preset, style=style, content_type=content_type, ) if negative_prompt.strip(): full_prompt += f". Avoid: {negative_prompt.strip()}" debug_lines.append(f"Task: {task_type}") debug_lines.append(f"Provider selected: {provider}") debug_lines.append(f"Preset: {preset}, Style: {style}") debug_lines.append(f"OpenAI size: {size}, quality: {quality}") debug_lines.append(f"Google image model: {google_image_model}") debug_lines.append(f"Google text model hint: {google_text_model_hint}") debug_lines.append(f"Seed enabled: {use_seed}, seed: {seed if use_seed else 'None'}") seed_val: Optional[int] = seed if use_seed else None try: # TEXT / INFOGRAPHIC if task_type in ["Text", "Infographic Spec"]: if provider == "OpenAI": if not openai_key.strip(): return "Missing OpenAI API key.", [], "OpenAI key not provided." text_output = generate_text_openai( api_key=openai_key.strip(), prompt=full_prompt, mode=task_type, ) else: if not google_key.strip(): return "Missing Google API key.", [], "Google key not provided." text_output = generate_text_google( api_key=google_key.strip(), prompt=full_prompt, mode=task_type, ) # IMAGE if task_type == "Image": primary = provider secondary = "OpenAI" if provider.startswith("Google") else "Google" # Helper to attempt OpenAI def try_openai() -> Tuple[List[Image.Image], str]: if not openai_key.strip(): raise ValueError("OpenAI key missing for OpenAI image generation.") image_model = "gpt-image-1" if "Palantir" in preset: image_model = "dall-e-3" imgs = generate_image_openai( api_key=openai_key.strip(), model=image_model, prompt=full_prompt, size=size, quality=quality, n_images=n_images, seed=seed_val, ) return imgs, image_model # Helper to attempt Google def try_google() -> List[Image.Image]: if not google_key.strip(): raise ValueError("Google key missing for Google image generation.") model_id = google_image_model.strip() or "gemini-1.5-flash" return generate_image_google( api_key=google_key.strip(), google_image_model=model_id, prompt=full_prompt, n_images=n_images, seed=seed_val, ) image_model_used = None try: if primary == "OpenAI": images, image_model_used = try_openai() else: # Google primary images = try_google() except Exception as e_primary: debug_lines.append(f"Primary provider {primary} error: {e_primary}") # Fallback if possible try: if secondary == "OpenAI": images, image_model_used = try_openai() else: images = try_google() debug_lines.append(f"Fallback provider {secondary} succeeded.") except Exception as e_secondary: debug_lines.append(f"Fallback provider {secondary} error: {e_secondary}") raise RuntimeError( f"Both providers failed. Primary: {e_primary} | Secondary: {e_secondary}" ) if image_model_used: debug_lines.append(f"OpenAI image model used: {image_model_used}") if not text_output and task_type == "Image": text_output = ( "Image(s) generated. Use Text or Infographic Spec mode to " "generate captions, copy, or layout specs." ) if task_type == "Image" and not images: debug_lines.append("No images returned from any provider.") return text_output, images, "\n".join(debug_lines) except Exception as e: debug_lines.append(f"Exception: {e}") return f"Error during generation: {e}", [], "\n".join(debug_lines) # ----------------------- # Starter prompts helper # ----------------------- STARTER_PROMPTS = { "None": "", "ZEN Glass Arena Card": ( "ZEN AI Arena holographic credential card showcasing a youth AI pioneer, " "glassmorphism border, quantum prism edges, subtle neon glow, " "nameplate and role, dark control-room background" ), "AI Pioneer Infographic": ( "Infographic showing the AI Pioneer Program journey from idea to deployment, " "timeline of modules, icons for coding, Hugging Face Spaces, and blockchain credentials, " "Palantir-style layout with three main columns" ), "Youth AI Literacy Poster": ( "Poster inviting teens to join the AI Pioneer Program, diverse students, laptops, " "cloud-hosted AI agents floating as holograms, bold headline and simple CTA, " "modern but serious aesthetic" ), "Vanguard Systems Blueprint": ( "Blueprint diagram of the ZEN ecosystem: AI Pioneer Program, ZEN Arena, " "blockchain credentials, ZEN dashboards, arrows showing data flow and automations, " "technical engineering style" ), "Instructor Training Card": ( "Training card for ZEN Vanguard instructors with modules listed, clean UI, " "minimal layout, white card on dark background, subtle gradient border, " "space for QR code and URL" ), } def load_starter_prompt(choice: str) -> str: return STARTER_PROMPTS.get(choice, "") def clear_outputs(): return "", [], "" # ----------------------- # UI # ----------------------- with gr.Blocks() as demo: gr.Markdown( """ # ๐Ÿงฌ ZEN Module 2 Section 2.11 โ€” Omni Studio A multi-provider creator used in the **ZEN Vanguard Program**. - ๐Ÿ”‘ Bring your own **OpenAI** and **Google (Gemini / Nano-Banana)** keys - ๐ŸŽจ Generate **images** with presets + fine-grained controls - ๐Ÿง  Generate **text** and **infographic specs** for ZEN dashboards, cards, and posters """ ) with gr.Row(): with gr.Column(): gr.Markdown("### ๐Ÿ” API Keys (local to this session)") openai_key = gr.Textbox( label="OPENAI_API_KEY", type="password", placeholder="sk-...", ) google_key = gr.Textbox( label="GOOGLE_API_KEY (Gemini / Nano-Banana)", type="password", placeholder="AIza...", ) gr.Markdown("### ๐ŸŽฏ Task & Provider") task_type = gr.Radio( ["Image", "Text", "Infographic Spec"], value="Image", label="Task Type", ) provider = gr.Radio( ["OpenAI", "Google (Nano-Banana / Gemini)"], value="OpenAI", label="Primary Provider", ) with gr.Accordion("Starter Prompts (ZEN Vanguard)", open=False): starter_choice = gr.Dropdown( list(STARTER_PROMPTS.keys()), value="None", label="Choose a starter prompt", ) load_prompt_btn = gr.Button("Load Starter Prompt") gr.Markdown( """ Use starter prompts to quickly explore: - **ZEN Glass Arena Card** โ€” holographic card-style image - **AI Pioneer Infographic** โ€” program journey and outcomes - **Youth AI Literacy Poster** โ€” outreach poster for teens - **Vanguard Systems Blueprint** โ€” systems-thinking diagram - **Instructor Training Card** โ€” card UI for trainers """ ) base_prompt = gr.Textbox( label="Main Prompt", lines=5, placeholder="Describe the ZEN image, text, or infographic you want.", ) negative_prompt = gr.Textbox( label="Negative Prompt (optional)", lines=2, placeholder="Things to avoid: low-res, clutter, warped text, etc.", ) with gr.Row(): preset = gr.Dropdown( [ "None", "ZEN Glass Dashboard", "Palantir / Anduril Infographic", "Youth AI Literacy Poster", "ZEN AI Arena Card", "Blueprint / Systems Diagram", ], value="ZEN Glass Dashboard", label="Visual Preset", ) style = gr.Dropdown( [ "Default", "Photoreal", "Illustration", "Futuristic UI", "Blueprint", "Cinematic", ], value="Futuristic UI", label="Style Accent", ) gr.Markdown("### ๐ŸŽ› OpenAI Image Controls") with gr.Row(): size = gr.Dropdown( [ "Square (1024x1024)", "Portrait (1024x1792)", "Landscape (1792x1024)", ], value="Square (1024x1024)", label="Aspect Ratio / Size", ) quality = gr.Dropdown( ["auto", "low", "medium", "high"], value="high", label="Quality (OpenAI)", ) n_images = gr.Slider( minimum=1, maximum=4, value=1, step=1, label="Number of Images", ) with gr.Row(): use_seed = gr.Checkbox( value=False, label="Lock Seed (repeatable outputs)", ) seed = gr.Slider( minimum=1, maximum=2**31 - 1, value=12345, step=1, label="Seed", ) gr.Markdown("### ๐Ÿงช Google Image / Text Model Hints") google_image_model = gr.Textbox( label="Google Image Model (default: gemini-1.5-flash)", value="gemini-1.5-flash", placeholder="e.g. your Nano-Banana model id or another image-capable model", ) google_text_model_hint = gr.Textbox( label="Google Text Model Hint", value="gemini-1.5-pro", placeholder="Used internally as default text model.", ) with gr.Row(): generate_btn = gr.Button("๐Ÿš€ Generate", variant="primary") clear_btn = gr.Button("Clear Outputs") with gr.Column(): gr.Markdown("### ๐Ÿ“œ Text / Spec Output") text_output = gr.Markdown() gr.Markdown("### ๐Ÿ–ผ Image Output") image_gallery = gr.Gallery( show_label=False, columns=2, height=500, ) gr.Markdown("### ๐Ÿงพ Debug / Logs") debug_output = gr.Textbox( label="Debug Info", lines=12, ) # Wire up callbacks generate_btn.click( fn=run_generation, inputs=[ openai_key, google_key, task_type, provider, base_prompt, negative_prompt, preset, style, size, quality, n_images, seed, use_seed, google_image_model, google_text_model_hint, ], outputs=[text_output, image_gallery, debug_output], ) load_prompt_btn.click( fn=load_starter_prompt, inputs=[starter_choice], outputs=[base_prompt], ) clear_btn.click( fn=clear_outputs, inputs=[], outputs=[text_output, image_gallery, debug_output], ) if __name__ == "__main__": demo.launch()