Spaces:

ZENLLC
/

Module2Space2

Sleeping

File size: 17,057 Bytes

import base64
import io
from typing import List, Tuple, Optional

import gradio as gr
from PIL import Image


# -----------------------
# OpenAI + Google helpers
# -----------------------

def _get_openai_client(api_key: str):
    from openai import OpenAI  # local import so app still loads if lib missing
    return OpenAI(api_key=api_key)


def _configure_google(api_key: str):
    import google.generativeai as genai
    genai.configure(api_key=api_key)
    return genai


# -----------------------
# Prompt / preset logic
# -----------------------

def apply_preset_to_prompt(
    base_prompt: str,
    preset: str,
    style: str,
    content_type: str,
) -> str:
    base_prompt = base_prompt.strip()

    preset_addons = {
        "None": "",
        "ZEN Glass Dashboard": (
            " ultra-detailed UI, glassmorphism, prismatic alloy panels, "
            "neon cyan and magenta HUD overlays, high-end enterprise dashboard"
        ),
        "Palantir / Anduril Infographic": (
            " dark enterprise command-center aesthetic, clean vector infographics, "
            "military-grade analytics overlays, sharp typography, high contrast, "
            "minimal but dense information layout"
        ),
        "Youth AI Literacy Poster": (
            " vibrant educational poster for teens, clean icons, diverse students, "
            "friendly but serious tone, clear typography, classroom-ready layout"
        ),
        "ZEN AI Arena Card": (
            " holographic trading card style, quantum glass edges, subtle glow, "
            "sharp logo lockup, futuristic typography, dramatic lighting"
        ),
        "Blueprint / Systems Diagram": (
            " technical blueprint, white lines on deep navy background, callout labels, "
            "flow arrows, system nodes, engineering drawing style"
        ),
    }

    style_addons = {
        "Default": "",
        "Photoreal": " hyper-realistic photography, physically based lighting",
        "Illustration": " clean vector illustration style, flat colors, crisp lines",
        "Futuristic UI": " futuristic interface design, HUD, holographic widgets",
        "Blueprint": " blueprint drawing, schematic lines, engineering grid",
        "Cinematic": " cinematic lighting, dramatic composition, filmic contrast",
    }

    if content_type == "Image":
        ct_addon = " high-resolution concept art,"
    elif content_type == "Infographic Spec":
        ct_addon = (
            " detailed infographic design specification, including layout regions, "
            "sections, labels, and visual hierarchy,"
        )
    else:
        ct_addon = ""

    extra = " ".join(
        x
        for x in [
            ct_addon,
            preset_addons.get(preset, ""),
            style_addons.get(style, ""),
        ]
        if x
    )

    if extra:
        if base_prompt:
            return f"{base_prompt}, {extra}"
        else:
            return extra.strip()

    return base_prompt or "high quality image"


# -----------------------
# OpenAI text + images
# -----------------------

def generate_text_openai(
    api_key: str,
    prompt: str,
    mode: str,
) -> str:
    client = _get_openai_client(api_key)

    system_msg = (
        "You are an expert creator for the ZEN AI ecosystem. "
        "Write clear, concise, high-leverage content. "
        "If mode is 'Infographic Spec', output a structured outline with sections, "
        "titles, short captions, and suggested visual elements."
    )

    if mode == "Infographic Spec":
        user_prompt = (
            "Create a Palantir/Anduril-level infographic specification based on this topic:\n\n"
            f"{prompt}\n\n"
            "Return:\n"
            "1) Title options\n"
            "2) 3–5 main sections\n"
            "3) Bullet points for each section\n"
            "4) Suggested charts/visuals\n"
            "5) Color and typography recommendations."
        )
    else:
        user_prompt = prompt

    resp = client.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": system_msg},
            {"role": "user", "content": user_prompt},
        ],
        temperature=0.7,
    )
    return resp.choices[0].message.content


def decode_b64_images(b64_list: List[str]) -> List[Image.Image]:
    images: List[Image.Image] = []
    for b64 in b64_list:
        raw = base64.b64decode(b64)
        img = Image.open(io.BytesIO(raw)).convert("RGB")
        images.append(img)
    return images


def generate_image_openai(
    api_key: str,
    model: str,
    prompt: str,
    size: str,
    quality: str,
    n_images: int,
    seed: Optional[int],
) -> List[Image.Image]:
    client = _get_openai_client(api_key)

    size_map = {
        "Square (1024x1024)": "1024x1024",
        "Portrait (1024x1792)": "1024x1792",
        "Landscape (1792x1024)": "1792x1024",
    }
    size_param = size_map.get(size, "1024x1024")

    kwargs = {
        "model": model,
        "prompt": prompt,
        "size": size_param,
        "quality": quality,
        "n": n_images,
    }
    if seed is not None:
        kwargs["seed"] = seed

    resp = client.images.generate(**kwargs)
    b64_list = [d.b64_json for d in resp.data]
    return decode_b64_images(b64_list)


# -----------------------
# Google (Gemini / Nano-Banana)
# -----------------------

def generate_text_google(
    api_key: str,
    prompt: str,
    mode: str,
) -> str:
    genai = _configure_google(api_key)
    model = genai.GenerativeModel("gemini-1.5-pro")

    if mode == "Infographic Spec":
        content = (
            "You are an expert enterprise communicator. "
            "Create a Palantir/Anduril-grade infographic spec.\n\n"
            f"Topic / prompt:\n{prompt}\n\n"
            "Return:\n"
            "1) Title options\n"
            "2) Main sections with bullet points\n"
            "3) Visual layout ideas\n"
            "4) Chart/visualization suggestions\n"
            "5) Palette & typography notes."
        )
    else:
        content = prompt

    resp = model.generate_content(content)
    return resp.text


def generate_image_google(
    api_key: str,
    google_image_model: str,
    prompt: str,
    n_images: int,
    seed: Optional[int],
) -> List[Image.Image]:
    """
    This assumes your Nano-Banana / Nano-Banana-Pro image model in
    Google AI Studio returns inline image bytes in the response.
    Adjust parsing if your model behaves differently.
    """
    genai = _configure_google(api_key)
    model = genai.GenerativeModel(google_image_model)

    images: List[Image.Image] = []

    for i in range(n_images):
        generation_config = {}
        if seed is not None:
            generation_config["seed"] = seed + i

        resp = model.generate_content(
            prompt,
            generation_config=generation_config or None,
        )

        # Extract images from candidates
        for cand in getattr(resp, "candidates", []):
            for part in getattr(cand, "content", {}).parts:
                inline = getattr(part, "inline_data", None)
                if inline and getattr(inline, "data", None):
                    raw = base64.b64decode(inline.data)
                    img = Image.open(io.BytesIO(raw)).convert("RGB")
                    images.append(img)

    return images


# -----------------------
# Core callback
# -----------------------

def run_generation(
    openai_key: str,
    google_key: str,
    task_type: str,
    provider: str,
    base_prompt: str,
    negative_prompt: str,
    preset: str,
    style: str,
    size: str,
    quality: str,
    n_images: int,
    seed: int,
    use_seed: bool,
    google_image_model: str,
    google_text_model_hint: str,  # currently just logged
) -> Tuple[str, List[Image.Image], str]:
    text_output = ""
    images: List[Image.Image] = []
    debug_lines = []

    if not base_prompt.strip():
        return "Please enter a prompt.", [], "No prompt provided."

    content_type = "Image" if task_type == "Image" else task_type
    full_prompt = apply_preset_to_prompt(
        base_prompt=base_prompt,
        preset=preset,
        style=style,
        content_type=content_type,
    )

    if negative_prompt.strip():
        full_prompt += f". Avoid: {negative_prompt.strip()}"

    debug_lines.append(f"Task: {task_type}")
    debug_lines.append(f"Provider: {provider}")
    debug_lines.append(f"Preset: {preset}, Style: {style}")
    debug_lines.append(f"OpenAI size: {size}, quality: {quality}")
    debug_lines.append(f"Google image model: {google_image_model}")
    debug_lines.append(f"Google text model hint: {google_text_model_hint}")
    debug_lines.append(f"Seed enabled: {use_seed}, seed: {seed if use_seed else 'None'}")

    seed_val: Optional[int] = seed if use_seed else None

    try:
        # TEXT / INFOGRAPHIC
        if task_type in ["Text", "Infographic Spec"]:
            if provider == "OpenAI":
                if not openai_key.strip():
                    return "Missing OpenAI API key.", [], "OpenAI key not provided."
                text_output = generate_text_openai(
                    api_key=openai_key.strip(),
                    prompt=full_prompt,
                    mode=task_type,
                )
            else:
                if not google_key.strip():
                    return "Missing Google API key.", [], "Google key not provided."
                text_output = generate_text_google(
                    api_key=google_key.strip(),
                    prompt=full_prompt,
                    mode=task_type,
                )

        # IMAGE
        if task_type == "Image":
            if provider == "OpenAI":
                if not openai_key.strip():
                    return "Missing OpenAI API key.", [], "OpenAI key not provided."

                # Default to GPT-Image-1; for Palantir preset, swap to DALL·E 3
                image_model = "gpt-image-1"
                if "Palantir" in preset:
                    image_model = "dall-e-3"

                images = generate_image_openai(
                    api_key=openai_key.strip(),
                    model=image_model,
                    prompt=full_prompt,
                    size=size,
                    quality=quality,
                    n_images=n_images,
                    seed=seed_val,
                )
                debug_lines.append(f"OpenAI image model: {image_model}")
            else:
                if not google_key.strip():
                    return "Missing Google API key.", [], "Google key not provided."
                images = generate_image_google(
                    api_key=google_key.strip(),
                    google_image_model=google_image_model.strip() or "nano-banana-pro",
                    prompt=full_prompt,
                    n_images=n_images,
                    seed=seed_val,
                )

        if not text_output and task_type == "Image":
            text_output = (
                "Image(s) generated. Use Text / Infographic Spec mode to "
                "generate captions, copy, or layout specs."
            )

        if not images and task_type == "Image":
            debug_lines.append("No images returned from provider.")

        return text_output, images, "\n".join(debug_lines)

    except Exception as e:
        debug_lines.append(f"Exception: {e}")
        return f"Error: {e}", [], "\n".join(debug_lines)


# -----------------------
# UI
# -----------------------

with gr.Blocks() as demo:  # <- no theme arg
    gr.Markdown(
        """
# 🧬 ZEN Omni Studio — Text • Images • Infographics

Multi-provider creator for the ZEN ecosystem:

- 🔑 Bring your own OpenAI + Google (Gemini / Nano-Banana / Nano-Banana-Pro) keys  
- 🎨 Generate **images** with presets + fine-grained controls  
- 🧠 Generate **text** and **infographic specs** for ZEN dashboards, posters, and more  
        """
    )

    with gr.Row():
        with gr.Column():
            gr.Markdown("### 🔐 API Keys (local to this session)")

            openai_key = gr.Textbox(
                label="OPENAI_API_KEY",
                type="password",
                placeholder="sk-...",
            )
            google_key = gr.Textbox(
                label="GOOGLE_API_KEY (Gemini / Nano-Banana)",
                type="password",
                placeholder="AIza...",
            )

            gr.Markdown("### 🎯 Task & Provider")
            task_type = gr.Radio(
                ["Image", "Text", "Infographic Spec"],
                value="Image",
                label="Task Type",
            )
            provider = gr.Radio(
                ["Google (Nano-Banana / Gemini)", "OpenAI"],
                value="Google (Nano-Banana / Gemini)",
                label="Primary Provider",
            )

            base_prompt = gr.Textbox(
                label="Main Prompt",
                lines=5,
                placeholder="Describe the ZEN image, text, or infographic you want.",
            )
            negative_prompt = gr.Textbox(
                label="Negative Prompt (optional)",
                lines=2,
                placeholder="Things to avoid: low-res, clutter, warped text, etc.",
            )

            with gr.Row():
                preset = gr.Dropdown(
                    [
                        "None",
                        "ZEN Glass Dashboard",
                        "Palantir / Anduril Infographic",
                        "Youth AI Literacy Poster",
                        "ZEN AI Arena Card",
                        "Blueprint / Systems Diagram",
                    ],
                    value="ZEN Glass Dashboard",
                    label="Visual Preset",
                )
                style = gr.Dropdown(
                    [
                        "Default",
                        "Photoreal",
                        "Illustration",
                        "Futuristic UI",
                        "Blueprint",
                        "Cinematic",
                    ],
                    value="Futuristic UI",
                    label="Style Accent",
                )

            gr.Markdown("### 🎛 OpenAI Image Controls")
            with gr.Row():
                size = gr.Dropdown(
                    [
                        "Square (1024x1024)",
                        "Portrait (1024x1792)",
                        "Landscape (1792x1024)",
                    ],
                    value="Square (1024x1024)",
                    label="Aspect Ratio / Size",
                )
                quality = gr.Dropdown(
                    ["standard", "hd"],
                    value="hd",
                    label="Quality",
                )
                n_images = gr.Slider(
                    minimum=1,
                    maximum=4,
                    value=1,
                    step=1,
                    label="Number of Images",
                )

            with gr.Row():
                use_seed = gr.Checkbox(
                    value=False,
                    label="Lock Seed (repeatable outputs)",
                )
                seed = gr.Slider(
                    minimum=1,
                    maximum=2**31 - 1,
                    value=12345,
                    step=1,
                    label="Seed",
                )

            gr.Markdown("### 🧪 Google Image / Text Model Hints")
            google_image_model = gr.Textbox(
                label="Google Image Model (default: nano-banana-pro)",
                value="nano-banana-pro",
                placeholder="e.g. nano-banana-pro or your exact model id",
            )
            google_text_model_hint = gr.Textbox(
                label="Google Text Model Hint",
                value="gemini-1.5-pro",
                placeholder="Used internally as default text model.",
            )

            generate_btn = gr.Button("🚀 Generate", variant="primary")

        with gr.Column():
            gr.Markdown("### 📜 Text / Spec Output")
            text_output = gr.Markdown()

            gr.Markdown("### 🖼 Image Output")
            image_gallery = gr.Gallery(
                show_label=False,
                columns=2,
                height=500,
            )

            gr.Markdown("### 🧾 Debug / Logs")
            debug_output = gr.Textbox(
                label="Debug Info",
                lines=10,
            )

    generate_btn.click(
        fn=run_generation,
        inputs=[
            openai_key,
            google_key,
            task_type,
            provider,
            base_prompt,
            negative_prompt,
            preset,
            style,
            size,
            quality,
            n_images,
            seed,
            use_seed,
            google_image_model,
            google_text_model_hint,
        ],
        outputs=[text_output, image_gallery, debug_output],
    )

if __name__ == "__main__":
    demo.launch()