ZENLLC commited on
Commit
442ebe4
Β·
verified Β·
1 Parent(s): a293d19

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +324 -0
app.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, io, json, zipfile, time, hashlib
2
+ from typing import List, Dict, Any, Optional
3
+ import gradio as gr
4
+ from pydantic import BaseModel, Field
5
+ from tenacity import retry, stop_after_attempt, wait_exponential
6
+
7
+ # --- Optional .env support (Space secrets still preferred) ---
8
+ try:
9
+ from dotenv import load_dotenv
10
+ load_dotenv()
11
+ except Exception:
12
+ pass
13
+
14
+ # --- Model SDKs ---
15
+ # OpenAI (GPT-5)
16
+ try:
17
+ from openai import OpenAI
18
+ except Exception:
19
+ OpenAI = None
20
+
21
+ # Anthropic (Claude Sonnet 4.5 / latest)
22
+ try:
23
+ import anthropic
24
+ except Exception:
25
+ anthropic = None
26
+
27
+ # Firecrawl official SDK (v2.x)
28
+ # Docs: https://docs.firecrawl.dev/features/search , /scrape , /crawl
29
+ # Example usage shows Firecrawl(api_key).search(..., scrape_options={formats: [...]})
30
+ from firecrawl import Firecrawl # type: ignore
31
+
32
+ # --------------------------
33
+ # Session key handling
34
+ # --------------------------
35
+ class Keys(BaseModel):
36
+ openai: Optional[str] = None
37
+ anthropic: Optional[str] = None
38
+ firecrawl: Optional[str] = None
39
+
40
+ def resolve_keys(session: Keys) -> Keys:
41
+ """Priority: UI session input > environment variables > None."""
42
+ return Keys(
43
+ openai = session.openai or os.getenv("OPENAI_API_KEY"),
44
+ anthropic = session.anthropic or os.getenv("ANTHROPIC_API_KEY"),
45
+ firecrawl = session.firecrawl or os.getenv("FIRECRAWL_API_KEY"),
46
+ )
47
+
48
+ # --------------------------
49
+ # Firecrawl helpers
50
+ # --------------------------
51
+ def fc_client(session: Keys) -> Firecrawl:
52
+ keys = resolve_keys(session)
53
+ if not keys.firecrawl:
54
+ raise gr.Error("Missing FIRECRAWL_API_KEY. Enter it in Keys ➜ Save.")
55
+ return Firecrawl(api_key=keys.firecrawl)
56
+
57
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=8))
58
+ def fc_search(session: Keys, query: str, limit: int = 5, scrape_formats: Optional[List[str]] = None, location: Optional[str] = None) -> Dict[str, Any]:
59
+ fc = fc_client(session)
60
+ kwargs: Dict[str, Any] = {"query": query, "limit": limit}
61
+ if location:
62
+ kwargs["location"] = location
63
+ if scrape_formats:
64
+ # per docs: search(..., scrape_options={"formats": [...]})
65
+ kwargs["scrape_options"] = {"formats": scrape_formats}
66
+ return fc.search(**kwargs)
67
+
68
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=8))
69
+ def fc_scrape(session: Keys, url: str, formats: Optional[List[str]] = None, timeout_ms: Optional[int] = None, mobile: bool = False) -> Dict[str, Any]:
70
+ fc = fc_client(session)
71
+ kwargs: Dict[str, Any] = {"url": url}
72
+ if formats:
73
+ kwargs["formats"] = formats
74
+ if timeout_ms:
75
+ kwargs["timeout"] = timeout_ms
76
+ if mobile:
77
+ kwargs["mobile"] = True
78
+ return fc.scrape(**kwargs)
79
+
80
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=8))
81
+ def fc_crawl(session: Keys, url: str, max_pages: int = 25, formats: Optional[List[str]] = None) -> Dict[str, Any]:
82
+ fc = fc_client(session)
83
+ kwargs: Dict[str, Any] = {"url": url, "limit": max_pages}
84
+ if formats:
85
+ kwargs["scrape_options"] = {"formats": formats}
86
+ return fc.crawl(**kwargs)
87
+
88
+ # --------------------------
89
+ # LLM helpers (GPT-5 / Claude Sonnet)
90
+ # --------------------------
91
+ def use_openai(session: Keys):
92
+ keys = resolve_keys(session)
93
+ if not keys.openai:
94
+ raise gr.Error("Missing OPENAI_API_KEY.")
95
+ if OpenAI is None:
96
+ raise gr.Error("OpenAI SDK not installed.")
97
+ return OpenAI(api_key=keys.openai)
98
+
99
+ def use_anthropic(session: Keys):
100
+ keys = resolve_keys(session)
101
+ if not keys.anthropic:
102
+ raise gr.Error("Missing ANTHROPIC_API_KEY.")
103
+ if anthropic is None:
104
+ raise gr.Error("Anthropic SDK not installed.")
105
+ return anthropic.Anthropic(api_key=keys.anthropic)
106
+
107
+ SYSTEM_STEER = (
108
+ "You are ZEN's VibeCoder: extract web insights, generate clean scaffolds, "
109
+ "and produce production-ready artifacts. Prefer structured outlines, code blocks, and checklists. "
110
+ "When asked to clone or refactor, output file trees and exact text."
111
+ )
112
+
113
+ def llm_summarize(session: Keys, provider: str, model_name: str, prompt: str, context_md: str, temp: float=0.4) -> str:
114
+ if provider == "openai":
115
+ client = use_openai(session)
116
+ resp = client.chat.completions.create(
117
+ model=model_name, # e.g., "gpt-5"
118
+ temperature=temp,
119
+ messages=[
120
+ {"role": "system", "content": SYSTEM_STEER},
121
+ {"role": "user", "content": f"{prompt}\n\n=== SOURCE (markdown) ===\n{context_md[:150000]}"},
122
+ ],
123
+ )
124
+ return resp.choices[0].message.content or ""
125
+ else:
126
+ client = use_anthropic(session)
127
+ resp = client.messages.create(
128
+ model=model_name, # e.g., "claude-3-5-sonnet-latest" or "claude-sonnet-4.5"
129
+ max_tokens=4000,
130
+ temperature=temp,
131
+ system=SYSTEM_STEER,
132
+ messages=[
133
+ {"role": "user", "content": f"{prompt}\n\n=== SOURCE (markdown) ===\n{context_md[:150000]}"},
134
+ ],
135
+ )
136
+ return "".join([blk.text for blk in resp.content if hasattr(blk, "text")])
137
+
138
+ # --------------------------
139
+ # ZIP export
140
+ # --------------------------
141
+ def pack_zip(pages: List[Dict[str, Any]]) -> bytes:
142
+ mem = io.BytesIO()
143
+ with zipfile.ZipFile(mem, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
144
+ manifest = []
145
+ for i, p in enumerate(pages, start=1):
146
+ url = p.get("url") or p.get("metadata", {}).get("sourceURL") or f"page_{i}"
147
+ slug = hashlib.sha1(url.encode("utf-8")).hexdigest()[:10]
148
+ md = p.get("markdown") or p.get("data", {}).get("markdown") or p.get("content") or ""
149
+ html = p.get("html") or p.get("data", {}).get("html") or ""
150
+ links = p.get("links") or p.get("data", {}).get("links") or []
151
+ # write markdown/html if present
152
+ if md:
153
+ zf.writestr(f"{i:03d}_{slug}.md", md)
154
+ if html:
155
+ zf.writestr(f"{i:03d}_{slug}.html", html)
156
+ # lightweight json record
157
+ record = {"url": url, "title": p.get("title") or p.get("metadata", {}).get("title"), "links": links}
158
+ manifest.append(record)
159
+ zf.writestr("manifest.json", json.dumps(manifest, indent=2))
160
+ mem.seek(0)
161
+ return mem.read()
162
+
163
+ # --------------------------
164
+ # Gradio actions
165
+ # --------------------------
166
+ def save_keys(openai_key, anthropic_key, firecrawl_key):
167
+ # session state object
168
+ return Keys(openai=openai_key.strip() or None,
169
+ anthropic=anthropic_key.strip() or None,
170
+ firecrawl=firecrawl_key.strip() or None), gr.Info("Keys saved to this session. (Env vars still apply if set.)")
171
+
172
+ def action_search(session: Keys, query: str, limit: int, scrape_content: bool, location: str):
173
+ if not query.strip():
174
+ raise gr.Error("Enter a search query.")
175
+ formats = ["markdown", "links"] if scrape_content else None
176
+ result = fc_search(session, query=query.strip(), limit=limit, scrape_formats=formats, location=location or None)
177
+ # Normalize to a flat list of items for display
178
+ data = result.get("data")
179
+ items = []
180
+ if isinstance(data, dict):
181
+ # data may contain keys like "web", "news", "images"
182
+ for bucket in ["web", "news", "images"]:
183
+ for it in data.get(bucket, []):
184
+ items.append(it)
185
+ elif isinstance(data, list):
186
+ items = data
187
+ return json.dumps(items, indent=2)
188
+
189
+ def action_scrape(session: Keys, url: str, mobile: bool, formats_sel: List[str], timeout_ms: int):
190
+ if not url.strip():
191
+ raise gr.Error("Enter a URL.")
192
+ formats = formats_sel or ["markdown", "html", "links"]
193
+ out = fc_scrape(session, url.strip(), formats=formats, timeout_ms=timeout_ms or None, mobile=mobile)
194
+ # Firecrawl returns a dict; try to surface key content
195
+ pretty = json.dumps(out, indent=2)[:300000]
196
+ md = out.get("markdown") or out.get("data", {}).get("markdown") or ""
197
+ return pretty, md
198
+
199
+ def action_crawl(session: Keys, base_url: str, max_pages: int, formats_sel: List[str]):
200
+ if not base_url.strip():
201
+ raise gr.Error("Enter a base URL to crawl.")
202
+ formats = formats_sel or ["markdown", "links"]
203
+ out = fc_crawl(session, base_url.strip(), max_pages=max_pages, formats=formats)
204
+ # Expect out["data"] as list of pages
205
+ pages = out.get("data") if isinstance(out, dict) else None
206
+ if not isinstance(pages, list) or len(pages) == 0:
207
+ raise gr.Error("Crawl returned no pages.")
208
+ zip_bytes = pack_zip(pages)
209
+ return gr.File.update(value=io.BytesIO(zip_bytes), visible=True, filename="site_clone.zip"), f"Crawled {len(pages)} pages. ZIP is ready."
210
+
211
+ def action_generate(session: Keys, provider: str, model_name: str, sys_prompt: str, user_prompt: str, context_md: str, temp: float):
212
+ if not user_prompt.strip():
213
+ raise gr.Error("Enter a prompt or click a starter tile.")
214
+ model = model_name.strip() or ("gpt-5" if provider == "openai" else "claude-3-5-sonnet-latest")
215
+ steer = (sys_prompt or "").strip()
216
+ prompt = (("SYSTEM:\n" + steer + "\n\n") if steer else "") + user_prompt.strip()
217
+ out = llm_summarize(session, provider, model, prompt, context_md or "", temp=temp)
218
+ return out
219
+
220
+ # --------------------------
221
+ # UI (Blocks)
222
+ # --------------------------
223
+ with gr.Blocks(css="""
224
+ #keys .wrap.svelte-1ipelgc { filter: none !important; }
225
+ """) as demo:
226
+ gr.Markdown("## ZEN VibeCoder β€” Web Clone & Research Foundry")
227
+ session_state = gr.State(Keys()) # holds keys
228
+
229
+ with gr.Accordion("πŸ” Keys (session)", open=True):
230
+ with gr.Row():
231
+ openai_key = gr.Textbox(label="OPENAI_API_KEY (GPT-5)", type="password", placeholder="sk-...", value=os.getenv("OPENAI_API_KEY") or "")
232
+ anthropic_key = gr.Textbox(label="ANTHROPIC_API_KEY (Claude Sonnet)", type="password", placeholder="anthropic-key...", value=os.getenv("ANTHROPIC_API_KEY") or "")
233
+ firecrawl_key = gr.Textbox(label="FIRECRAWL_API_KEY", type="password", placeholder="fc-...", value=os.getenv("FIRECRAWL_API_KEY") or "")
234
+ save_btn = gr.Button("Save keys", variant="primary")
235
+ save_msg = gr.Markdown()
236
+ save_btn.click(save_keys, [openai_key, anthropic_key, firecrawl_key], [session_state, save_msg])
237
+
238
+ with gr.Tabs():
239
+ # --- TAB: Search ---
240
+ with gr.Tab("πŸ”Ž Search"):
241
+ query = gr.Textbox(label="Query", placeholder='ex: "best open-source vector databases in 2025 site:docs"')
242
+ with gr.Row():
243
+ limit = gr.Slider(1, 20, value=6, step=1, label="Limit")
244
+ scrape_content = gr.Checkbox(label="Also scrape results (markdown + links)", value=True)
245
+ location = gr.Textbox(label="Location (optional)", placeholder="ex: Germany")
246
+ go_search = gr.Button("Run Search", variant="primary")
247
+ search_json = gr.Code(label="Results JSON", language="json")
248
+ go_search.click(action_search, [session_state, query, limit, scrape_content, location], [search_json])
249
+
250
+ # --- TAB: Scrape / Crawl / Clone ---
251
+ with gr.Tab("πŸ•ΈοΈ Scrape β€’ Crawl β€’ Clone"):
252
+ with gr.Row():
253
+ target_url = gr.Textbox(label="URL to Scrape", placeholder="https://example.com")
254
+ timeout_ms = gr.Number(label="Timeout (ms)", value=15000)
255
+ with gr.Row():
256
+ formats_sel = gr.CheckboxGroup(choices=["markdown","html","links","screenshot"], value=["markdown","links"], label="Formats")
257
+ mobile = gr.Checkbox(label="Emulate mobile", value=False)
258
+ run_scrape = gr.Button("Scrape URL", variant="primary")
259
+ scrape_json = gr.Code(label="Raw Response (JSON)", language="json")
260
+ scrape_md = gr.Markdown(label="Markdown Preview")
261
+ run_scrape.click(action_scrape, [session_state, target_url, mobile, formats_sel, timeout_ms], [scrape_json, scrape_md])
262
+
263
+ gr.Markdown("---")
264
+
265
+ with gr.Row():
266
+ base_url = gr.Textbox(label="Base URL to Crawl", placeholder="https://docs.firecrawl.dev")
267
+ max_pages = gr.Slider(1, 200, value=25, step=1, label="Max Pages")
268
+ formats_crawl = gr.CheckboxGroup(choices=["markdown","html","links"], value=["markdown","links"], label="Crawl Formats")
269
+ run_crawl = gr.Button("Crawl & Build ZIP", variant="primary")
270
+ zip_file = gr.File(label="Clone ZIP", visible=False)
271
+ crawl_status = gr.Markdown()
272
+ run_crawl.click(action_crawl, [session_state, base_url, max_pages, formats_crawl], [zip_file, crawl_status])
273
+
274
+ # --- TAB: Vibe Code (LLM Synthesis) ---
275
+ with gr.Tab("✨ Vibe Code (Synthesis)"):
276
+ with gr.Row():
277
+ provider = gr.Radio(choices=["openai","anthropic"], value="openai", label="Provider")
278
+ model_name = gr.Textbox(label="Model (override)", placeholder="gpt-5 | claude-3-5-sonnet-latest")
279
+ temp = gr.Slider(0.0, 1.2, value=0.4, step=0.05, label="Temperature")
280
+
281
+ sys_prompt = gr.Textbox(label="System Style (optional)",
282
+ value="Return structured outputs with file trees, code blocks and ordered steps. Be concise and concrete.")
283
+ user_prompt = gr.Textbox(label="User Prompt", lines=6)
284
+ ctx_md = gr.Textbox(label="Context (paste markdown from Scrape/Crawl)", lines=10)
285
+
286
+ with gr.Row():
287
+ gen_btn = gr.Button("Generate", variant="primary")
288
+ out_md = gr.Markdown()
289
+
290
+ # Starter Tiles
291
+ gr.Markdown("**Starter Tiles**")
292
+ with gr.Row():
293
+ t1 = gr.Button("πŸ”§ Clone Docs ➜ Clean Markdown ➜ README")
294
+ t2 = gr.Button("🧭 Competitor Teardown ➜ Features β€’ Pricing β€’ Moats")
295
+ t3 = gr.Button("πŸ§ͺ API Wrapper ➜ Python Client (requests + retries)")
296
+ t4 = gr.Button("πŸ“ Landing Page Rewrite ➜ ZEN Tone")
297
+ t5 = gr.Button("πŸ“Š Dataset Outline ➜ Schema + Fields + ETL")
298
+
299
+ def fill_tile(tile: str):
300
+ map_ = {
301
+ "t1": ("Create a clean knowledge pack from the context, then output a README.md with:\n- Overview\n- Key features\n- Quickstart\n- API endpoints (if any)\n- Notes & gotchas\n- License\nAlso produce a /docs/ tree outline with suggested pages and headings.",),
302
+ "t2": ("From the context, produce a feature matrix, pricing table, ICP notes, moats/risks, and a market POV. Conclude with a ZEN playbook: 5 lever moves for advantage.",),
303
+ "t3": ("Using the context, design a Python client that wraps the target API with retry/backoff and typed responses. Output:\n- package layout\n- requirements\n- client.py\n- examples/\n- README with usage.\nInclude robust error handling.",),
304
+ "t4": ("Rewrite the landing page in ZEN brand voice: crisp headline, 3 value props, social proof, CTA, and a concise FAQ. Provide HTML sections and copy blocks.",),
305
+ "t5": ("Propose a dataset schema based on the context. Output a table of fields, types, constraints, and an ETL plan (sources, transforms, validation, freshness, monitoring).",),
306
+ }
307
+ return map_[tile][0]
308
+
309
+ t1.click(lambda: fill_tile("t1"), outputs=[user_prompt])
310
+ t2.click(lambda: fill_tile("t2"), outputs=[user_prompt])
311
+ t3.click(lambda: fill_tile("t3"), outputs=[user_prompt])
312
+ t4.click(lambda: fill_tile("t4"), outputs=[user_prompt])
313
+ t5.click(lambda: fill_tile("t5"), outputs=[user_prompt])
314
+
315
+ gen_btn.click(action_generate, [session_state, provider, model_name, sys_prompt, user_prompt, ctx_md, temp], [out_md])
316
+
317
+ gr.Markdown(
318
+ "Built for **ZEN Arena** pipelines. Export ZIPs β†’ ingest β†’ credentialize achievements via ZEN Cards.\n"
319
+ "Docs used for Firecrawl behavior: search/scrape/crawl endpoints."
320
+ )
321
+
322
+ if __name__ == "__main__":
323
+ # Spaces expects 'app' or launch() with default port
324
+ demo.launch()