ZENLLC commited on
Commit
f151732
·
verified ·
1 Parent(s): b29274e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -57
app.py CHANGED
@@ -1,17 +1,17 @@
1
  import base64
2
  import io
3
- import os
4
  from typing import List, Tuple, Optional
5
 
6
  import gradio as gr
7
  from PIL import Image
8
 
 
9
  # -----------------------
10
- # Optional imports guarded at call-time
11
  # -----------------------
12
 
13
  def _get_openai_client(api_key: str):
14
- from openai import OpenAI # imported here to avoid issues if lib missing
15
  return OpenAI(api_key=api_key)
16
 
17
 
@@ -22,7 +22,7 @@ def _configure_google(api_key: str):
22
 
23
 
24
  # -----------------------
25
- # Helper: apply visual presets
26
  # -----------------------
27
 
28
  def apply_preset_to_prompt(
@@ -31,7 +31,6 @@ def apply_preset_to_prompt(
31
  style: str,
32
  content_type: str,
33
  ) -> str:
34
- """Augment the prompt with preset & style language."""
35
  base_prompt = base_prompt.strip()
36
 
37
  preset_addons = {
@@ -68,7 +67,6 @@ def apply_preset_to_prompt(
68
  "Cinematic": " cinematic lighting, dramatic composition, filmic contrast",
69
  }
70
 
71
- ct_addon = ""
72
  if content_type == "Image":
73
  ct_addon = " high-resolution concept art,"
74
  elif content_type == "Infographic Spec":
@@ -76,6 +74,8 @@ def apply_preset_to_prompt(
76
  " detailed infographic design specification, including layout regions, "
77
  "sections, labels, and visual hierarchy,"
78
  )
 
 
79
 
80
  extra = " ".join(
81
  x
@@ -92,11 +92,12 @@ def apply_preset_to_prompt(
92
  return f"{base_prompt}, {extra}"
93
  else:
94
  return extra.strip()
 
95
  return base_prompt or "high quality image"
96
 
97
 
98
  # -----------------------
99
- # OpenAI: Text + Image
100
  # -----------------------
101
 
102
  def generate_text_openai(
@@ -105,6 +106,7 @@ def generate_text_openai(
105
  mode: str,
106
  ) -> str:
107
  client = _get_openai_client(api_key)
 
108
  system_msg = (
109
  "You are an expert creator for the ZEN AI ecosystem. "
110
  "Write clear, concise, high-leverage content. "
@@ -114,7 +116,8 @@ def generate_text_openai(
114
 
115
  if mode == "Infographic Spec":
116
  user_prompt = (
117
- f"Create a Palantir/Anduril-level infographic specification based on:\n\n{prompt}\n\n"
 
118
  "Return:\n"
119
  "1) Title options\n"
120
  "2) 3–5 main sections\n"
@@ -125,7 +128,6 @@ def generate_text_openai(
125
  else:
126
  user_prompt = prompt
127
 
128
- # Using Chat Completions interface
129
  resp = client.chat.completions.create(
130
  model="gpt-4.1-mini",
131
  messages=[
@@ -157,7 +159,6 @@ def generate_image_openai(
157
  ) -> List[Image.Image]:
158
  client = _get_openai_client(api_key)
159
 
160
- # Map size choices to OpenAI-supported ones
161
  size_map = {
162
  "Square (1024x1024)": "1024x1024",
163
  "Portrait (1024x1792)": "1024x1792",
@@ -172,7 +173,6 @@ def generate_image_openai(
172
  "quality": quality,
173
  "n": n_images,
174
  }
175
- # seed is optional on some models; safe to include conditionally
176
  if seed is not None:
177
  kwargs["seed"] = seed
178
 
@@ -191,7 +191,6 @@ def generate_text_google(
191
  mode: str,
192
  ) -> str:
193
  genai = _configure_google(api_key)
194
- # Default to a strong text model
195
  model = genai.GenerativeModel("gemini-1.5-pro")
196
 
197
  if mode == "Infographic Spec":
@@ -221,10 +220,9 @@ def generate_image_google(
221
  seed: Optional[int],
222
  ) -> List[Image.Image]:
223
  """
224
- NOTE: Model & output handling may need adjustment depending on
225
- the exact Nano-Banana / Nano-Banana-Pro API you use in Google AI Studio.
226
-
227
- This assumes a GenerativeModel that returns inline image data.
228
  """
229
  genai = _configure_google(api_key)
230
  model = genai.GenerativeModel(google_image_model)
@@ -232,8 +230,6 @@ def generate_image_google(
232
  images: List[Image.Image] = []
233
 
234
  for i in range(n_images):
235
- # Some image models support generation_config with a seed;
236
- # here we pass it if present.
237
  generation_config = {}
238
  if seed is not None:
239
  generation_config["seed"] = seed + i
@@ -243,11 +239,12 @@ def generate_image_google(
243
  generation_config=generation_config or None,
244
  )
245
 
246
- # Try to pull image bytes from response parts
247
- for cand in resp.candidates:
248
- for part in cand.content.parts:
249
- if hasattr(part, "inline_data") and getattr(part.inline_data, "data", None):
250
- raw = base64.b64decode(part.inline_data.data)
 
251
  img = Image.open(io.BytesIO(raw)).convert("RGB")
252
  images.append(img)
253
 
@@ -255,7 +252,7 @@ def generate_image_google(
255
 
256
 
257
  # -----------------------
258
- # Core Gradio callback
259
  # -----------------------
260
 
261
  def run_generation(
@@ -273,11 +270,8 @@ def run_generation(
273
  seed: int,
274
  use_seed: bool,
275
  google_image_model: str,
276
- google_text_model_hint: str,
277
  ) -> Tuple[str, List[Image.Image], str]:
278
- """
279
- Returns: (text_output, images, debug_info)
280
- """
281
  text_output = ""
282
  images: List[Image.Image] = []
283
  debug_lines = []
@@ -285,7 +279,6 @@ def run_generation(
285
  if not base_prompt.strip():
286
  return "Please enter a prompt.", [], "No prompt provided."
287
 
288
- # Build full prompt for images
289
  content_type = "Image" if task_type == "Image" else task_type
290
  full_prompt = apply_preset_to_prompt(
291
  base_prompt=base_prompt,
@@ -300,7 +293,7 @@ def run_generation(
300
  debug_lines.append(f"Task: {task_type}")
301
  debug_lines.append(f"Provider: {provider}")
302
  debug_lines.append(f"Preset: {preset}, Style: {style}")
303
- debug_lines.append(f"OpenAI model size: {size}, quality: {quality}")
304
  debug_lines.append(f"Google image model: {google_image_model}")
305
  debug_lines.append(f"Google text model hint: {google_text_model_hint}")
306
  debug_lines.append(f"Seed enabled: {use_seed}, seed: {seed if use_seed else 'None'}")
@@ -308,7 +301,7 @@ def run_generation(
308
  seed_val: Optional[int] = seed if use_seed else None
309
 
310
  try:
311
- # TEXT or INFOGRAPHIC
312
  if task_type in ["Text", "Infographic Spec"]:
313
  if provider == "OpenAI":
314
  if not openai_key.strip():
@@ -332,11 +325,9 @@ def run_generation(
332
  if provider == "OpenAI":
333
  if not openai_key.strip():
334
  return "Missing OpenAI API key.", [], "OpenAI key not provided."
335
- # Decide OpenAI image model based on preset selection or UI (we can infer)
336
- # We'll expose choice via size/style; model stays fixed to user-facing dropdown externally
337
- # but here we assume they want GPT-Image-1 by default
338
  image_model = "gpt-image-1"
339
- # For Palantir/Anduril preset, sometimes DALL·E 3 is good – user can switch later by editing code.
340
  if "Palantir" in preset:
341
  image_model = "dall-e-3"
342
 
@@ -355,7 +346,7 @@ def run_generation(
355
  return "Missing Google API key.", [], "Google key not provided."
356
  images = generate_image_google(
357
  api_key=google_key.strip(),
358
- google_image_model=google_image_model.strip(),
359
  prompt=full_prompt,
360
  n_images=n_images,
361
  seed=seed_val,
@@ -363,8 +354,8 @@ def run_generation(
363
 
364
  if not text_output and task_type == "Image":
365
  text_output = (
366
- "Image(s) generated successfully. Add 'Text' or 'Infographic Spec' mode "
367
- "to generate descriptive copy or specs."
368
  )
369
 
370
  if not images and task_type == "Image":
@@ -373,29 +364,31 @@ def run_generation(
373
  return text_output, images, "\n".join(debug_lines)
374
 
375
  except Exception as e:
376
- return f"Error: {e}", [], "\n".join(debug_lines + [f"Exception: {e}"])
 
377
 
378
 
379
  # -----------------------
380
  # UI
381
  # -----------------------
382
 
383
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
384
  gr.Markdown(
385
  """
386
  # 🧬 ZEN Omni Studio — Text • Images • Infographics
387
 
388
- A unified creator for the ZEN ecosystem.
389
 
390
- - 🔑 **Bring your own keys** for OpenAI + Google (Gemini / Nano-Banana).
391
- - 🧠 Generate **text** and **infographic specs** for ZEN, AI literacy, dashboards, and more.
392
- - 🎨 Generate **high-quality images** with advanced controls and presets.
393
  """
394
  )
395
 
396
  with gr.Row():
397
  with gr.Column():
398
- gr.Markdown("### 🔐 API Keys (kept only in your browser session)")
 
399
  openai_key = gr.Textbox(
400
  label="OPENAI_API_KEY",
401
  type="password",
@@ -419,16 +412,15 @@ A unified creator for the ZEN ecosystem.
419
  label="Primary Provider",
420
  )
421
 
422
- # Prompt region
423
  base_prompt = gr.Textbox(
424
  label="Main Prompt",
425
  lines=5,
426
- placeholder="Describe what you want to create for ZEN (image, copy, infographic, etc.)",
427
  )
428
  negative_prompt = gr.Textbox(
429
  label="Negative Prompt (optional)",
430
  lines=2,
431
- placeholder="Things to avoid: low-res, cluttered, distorted text, etc.",
432
  )
433
 
434
  with gr.Row():
@@ -457,8 +449,7 @@ A unified creator for the ZEN ecosystem.
457
  label="Style Accent",
458
  )
459
 
460
- # OpenAI image options
461
- gr.Markdown("### 🎛 OpenAI Image Controls (DALL·E / GPT-Image)")
462
  with gr.Row():
463
  size = gr.Dropdown(
464
  [
@@ -497,14 +488,14 @@ A unified creator for the ZEN ecosystem.
497
 
498
  gr.Markdown("### 🧪 Google Image / Text Model Hints")
499
  google_image_model = gr.Textbox(
500
- label="Google Image Model (default: Nano-Banana-Pro)",
501
  value="nano-banana-pro",
502
- placeholder="e.g. nano-banana-pro (adjust to your actual model id in Google AI Studio)",
503
  )
504
  google_text_model_hint = gr.Textbox(
505
- label="Google Text Model Hint (for future tweaking)",
506
  value="gemini-1.5-pro",
507
- placeholder="Used in code as default Gemini text model.",
508
  )
509
 
510
  generate_btn = gr.Button("🚀 Generate", variant="primary")
@@ -515,19 +506,17 @@ A unified creator for the ZEN ecosystem.
515
 
516
  gr.Markdown("### 🖼 Image Output")
517
  image_gallery = gr.Gallery(
518
- label="Generated Images",
519
  show_label=False,
520
  columns=2,
521
  height=500,
522
  )
523
 
524
- gr.Markdown("### 🧾 Debug / Logs (for you, not end users)")
525
  debug_output = gr.Textbox(
526
  label="Debug Info",
527
  lines=10,
528
  )
529
 
530
- # Wire up callback
531
  generate_btn.click(
532
  fn=run_generation,
533
  inputs=[
 
1
  import base64
2
  import io
 
3
  from typing import List, Tuple, Optional
4
 
5
  import gradio as gr
6
  from PIL import Image
7
 
8
+
9
  # -----------------------
10
+ # OpenAI + Google helpers
11
  # -----------------------
12
 
13
  def _get_openai_client(api_key: str):
14
+ from openai import OpenAI # local import so app still loads if lib missing
15
  return OpenAI(api_key=api_key)
16
 
17
 
 
22
 
23
 
24
  # -----------------------
25
+ # Prompt / preset logic
26
  # -----------------------
27
 
28
  def apply_preset_to_prompt(
 
31
  style: str,
32
  content_type: str,
33
  ) -> str:
 
34
  base_prompt = base_prompt.strip()
35
 
36
  preset_addons = {
 
67
  "Cinematic": " cinematic lighting, dramatic composition, filmic contrast",
68
  }
69
 
 
70
  if content_type == "Image":
71
  ct_addon = " high-resolution concept art,"
72
  elif content_type == "Infographic Spec":
 
74
  " detailed infographic design specification, including layout regions, "
75
  "sections, labels, and visual hierarchy,"
76
  )
77
+ else:
78
+ ct_addon = ""
79
 
80
  extra = " ".join(
81
  x
 
92
  return f"{base_prompt}, {extra}"
93
  else:
94
  return extra.strip()
95
+
96
  return base_prompt or "high quality image"
97
 
98
 
99
  # -----------------------
100
+ # OpenAI text + images
101
  # -----------------------
102
 
103
  def generate_text_openai(
 
106
  mode: str,
107
  ) -> str:
108
  client = _get_openai_client(api_key)
109
+
110
  system_msg = (
111
  "You are an expert creator for the ZEN AI ecosystem. "
112
  "Write clear, concise, high-leverage content. "
 
116
 
117
  if mode == "Infographic Spec":
118
  user_prompt = (
119
+ "Create a Palantir/Anduril-level infographic specification based on this topic:\n\n"
120
+ f"{prompt}\n\n"
121
  "Return:\n"
122
  "1) Title options\n"
123
  "2) 3–5 main sections\n"
 
128
  else:
129
  user_prompt = prompt
130
 
 
131
  resp = client.chat.completions.create(
132
  model="gpt-4.1-mini",
133
  messages=[
 
159
  ) -> List[Image.Image]:
160
  client = _get_openai_client(api_key)
161
 
 
162
  size_map = {
163
  "Square (1024x1024)": "1024x1024",
164
  "Portrait (1024x1792)": "1024x1792",
 
173
  "quality": quality,
174
  "n": n_images,
175
  }
 
176
  if seed is not None:
177
  kwargs["seed"] = seed
178
 
 
191
  mode: str,
192
  ) -> str:
193
  genai = _configure_google(api_key)
 
194
  model = genai.GenerativeModel("gemini-1.5-pro")
195
 
196
  if mode == "Infographic Spec":
 
220
  seed: Optional[int],
221
  ) -> List[Image.Image]:
222
  """
223
+ This assumes your Nano-Banana / Nano-Banana-Pro image model in
224
+ Google AI Studio returns inline image bytes in the response.
225
+ Adjust parsing if your model behaves differently.
 
226
  """
227
  genai = _configure_google(api_key)
228
  model = genai.GenerativeModel(google_image_model)
 
230
  images: List[Image.Image] = []
231
 
232
  for i in range(n_images):
 
 
233
  generation_config = {}
234
  if seed is not None:
235
  generation_config["seed"] = seed + i
 
239
  generation_config=generation_config or None,
240
  )
241
 
242
+ # Extract images from candidates
243
+ for cand in getattr(resp, "candidates", []):
244
+ for part in getattr(cand, "content", {}).parts:
245
+ inline = getattr(part, "inline_data", None)
246
+ if inline and getattr(inline, "data", None):
247
+ raw = base64.b64decode(inline.data)
248
  img = Image.open(io.BytesIO(raw)).convert("RGB")
249
  images.append(img)
250
 
 
252
 
253
 
254
  # -----------------------
255
+ # Core callback
256
  # -----------------------
257
 
258
  def run_generation(
 
270
  seed: int,
271
  use_seed: bool,
272
  google_image_model: str,
273
+ google_text_model_hint: str, # currently just logged
274
  ) -> Tuple[str, List[Image.Image], str]:
 
 
 
275
  text_output = ""
276
  images: List[Image.Image] = []
277
  debug_lines = []
 
279
  if not base_prompt.strip():
280
  return "Please enter a prompt.", [], "No prompt provided."
281
 
 
282
  content_type = "Image" if task_type == "Image" else task_type
283
  full_prompt = apply_preset_to_prompt(
284
  base_prompt=base_prompt,
 
293
  debug_lines.append(f"Task: {task_type}")
294
  debug_lines.append(f"Provider: {provider}")
295
  debug_lines.append(f"Preset: {preset}, Style: {style}")
296
+ debug_lines.append(f"OpenAI size: {size}, quality: {quality}")
297
  debug_lines.append(f"Google image model: {google_image_model}")
298
  debug_lines.append(f"Google text model hint: {google_text_model_hint}")
299
  debug_lines.append(f"Seed enabled: {use_seed}, seed: {seed if use_seed else 'None'}")
 
301
  seed_val: Optional[int] = seed if use_seed else None
302
 
303
  try:
304
+ # TEXT / INFOGRAPHIC
305
  if task_type in ["Text", "Infographic Spec"]:
306
  if provider == "OpenAI":
307
  if not openai_key.strip():
 
325
  if provider == "OpenAI":
326
  if not openai_key.strip():
327
  return "Missing OpenAI API key.", [], "OpenAI key not provided."
328
+
329
+ # Default to GPT-Image-1; for Palantir preset, swap to DALL·E 3
 
330
  image_model = "gpt-image-1"
 
331
  if "Palantir" in preset:
332
  image_model = "dall-e-3"
333
 
 
346
  return "Missing Google API key.", [], "Google key not provided."
347
  images = generate_image_google(
348
  api_key=google_key.strip(),
349
+ google_image_model=google_image_model.strip() or "nano-banana-pro",
350
  prompt=full_prompt,
351
  n_images=n_images,
352
  seed=seed_val,
 
354
 
355
  if not text_output and task_type == "Image":
356
  text_output = (
357
+ "Image(s) generated. Use Text / Infographic Spec mode to "
358
+ "generate captions, copy, or layout specs."
359
  )
360
 
361
  if not images and task_type == "Image":
 
364
  return text_output, images, "\n".join(debug_lines)
365
 
366
  except Exception as e:
367
+ debug_lines.append(f"Exception: {e}")
368
+ return f"Error: {e}", [], "\n".join(debug_lines)
369
 
370
 
371
  # -----------------------
372
  # UI
373
  # -----------------------
374
 
375
+ with gr.Blocks() as demo: # <- no theme arg
376
  gr.Markdown(
377
  """
378
  # 🧬 ZEN Omni Studio — Text • Images • Infographics
379
 
380
+ Multi-provider creator for the ZEN ecosystem:
381
 
382
+ - 🔑 Bring your own OpenAI + Google (Gemini / Nano-Banana / Nano-Banana-Pro) keys
383
+ - 🎨 Generate **images** with presets + fine-grained controls
384
+ - 🧠 Generate **text** and **infographic specs** for ZEN dashboards, posters, and more
385
  """
386
  )
387
 
388
  with gr.Row():
389
  with gr.Column():
390
+ gr.Markdown("### 🔐 API Keys (local to this session)")
391
+
392
  openai_key = gr.Textbox(
393
  label="OPENAI_API_KEY",
394
  type="password",
 
412
  label="Primary Provider",
413
  )
414
 
 
415
  base_prompt = gr.Textbox(
416
  label="Main Prompt",
417
  lines=5,
418
+ placeholder="Describe the ZEN image, text, or infographic you want.",
419
  )
420
  negative_prompt = gr.Textbox(
421
  label="Negative Prompt (optional)",
422
  lines=2,
423
+ placeholder="Things to avoid: low-res, clutter, warped text, etc.",
424
  )
425
 
426
  with gr.Row():
 
449
  label="Style Accent",
450
  )
451
 
452
+ gr.Markdown("### 🎛 OpenAI Image Controls")
 
453
  with gr.Row():
454
  size = gr.Dropdown(
455
  [
 
488
 
489
  gr.Markdown("### 🧪 Google Image / Text Model Hints")
490
  google_image_model = gr.Textbox(
491
+ label="Google Image Model (default: nano-banana-pro)",
492
  value="nano-banana-pro",
493
+ placeholder="e.g. nano-banana-pro or your exact model id",
494
  )
495
  google_text_model_hint = gr.Textbox(
496
+ label="Google Text Model Hint",
497
  value="gemini-1.5-pro",
498
+ placeholder="Used internally as default text model.",
499
  )
500
 
501
  generate_btn = gr.Button("🚀 Generate", variant="primary")
 
506
 
507
  gr.Markdown("### 🖼 Image Output")
508
  image_gallery = gr.Gallery(
 
509
  show_label=False,
510
  columns=2,
511
  height=500,
512
  )
513
 
514
+ gr.Markdown("### 🧾 Debug / Logs")
515
  debug_output = gr.Textbox(
516
  label="Debug Info",
517
  lines=10,
518
  )
519
 
 
520
  generate_btn.click(
521
  fn=run_generation,
522
  inputs=[