|
|
import os, io, json, zipfile, hashlib, time |
|
|
from typing import List, Dict, Any, Optional, Tuple |
|
|
import gradio as gr |
|
|
from pydantic import BaseModel |
|
|
from tenacity import retry, stop_after_attempt, wait_exponential, RetryError |
|
|
|
|
|
|
|
|
try: |
|
|
from dotenv import load_dotenv |
|
|
load_dotenv() |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
try: |
|
|
from openai import OpenAI |
|
|
except Exception: |
|
|
OpenAI = None |
|
|
|
|
|
try: |
|
|
import anthropic |
|
|
from anthropic import NotFoundError as AnthropicNotFound |
|
|
except Exception: |
|
|
anthropic = None |
|
|
AnthropicNotFound = Exception |
|
|
|
|
|
from firecrawl import Firecrawl |
|
|
|
|
|
|
|
|
def _to_dict(obj: Any) -> Any: |
|
|
if isinstance(obj, BaseModel): |
|
|
return obj.model_dump() |
|
|
if isinstance(obj, dict): |
|
|
return {k: _to_dict(v) for k, v in obj.items()} |
|
|
if isinstance(obj, (list, tuple)): |
|
|
return [_to_dict(v) for v in obj] |
|
|
if hasattr(obj, "__dict__") and not isinstance(obj, (str, bytes)): |
|
|
try: |
|
|
return {k: _to_dict(v) for k, v in vars(obj).items()} |
|
|
except Exception: |
|
|
pass |
|
|
return obj |
|
|
|
|
|
def _pretty_json(data: Any, limit: int = 300_000) -> str: |
|
|
try: |
|
|
s = json.dumps(_to_dict(data), indent=2) |
|
|
return s[:limit] |
|
|
except Exception as e: |
|
|
return f"<!> Could not serialize to JSON: {e}" |
|
|
|
|
|
def _listify(x) -> List[Any]: |
|
|
if x is None: |
|
|
return [] |
|
|
if isinstance(x, list): |
|
|
return x |
|
|
return [x] |
|
|
|
|
|
def _hash(s: str) -> str: |
|
|
return hashlib.sha1(s.encode("utf-8")).hexdigest()[:10] |
|
|
|
|
|
|
|
|
class Keys(BaseModel): |
|
|
openai: Optional[str] = None |
|
|
anthropic: Optional[str] = None |
|
|
firecrawl: Optional[str] = None |
|
|
|
|
|
def resolve_keys(s: Keys) -> Keys: |
|
|
return Keys( |
|
|
openai=s.openai or os.getenv("OPENAI_API_KEY"), |
|
|
anthropic=s.anthropic or os.getenv("ANTHROPIC_API_KEY"), |
|
|
firecrawl=s.firecrawl or os.getenv("FIRECRAWL_API_KEY"), |
|
|
) |
|
|
|
|
|
|
|
|
def fc_client(s: Keys) -> Firecrawl: |
|
|
k = resolve_keys(s) |
|
|
if not k.firecrawl: |
|
|
raise gr.Error("Missing FIRECRAWL_API_KEY. Enter it in Keys β Save.") |
|
|
return Firecrawl(api_key=k.firecrawl) |
|
|
|
|
|
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=8)) |
|
|
def fc_search(s: Keys, query: str, limit: int = 5, scrape_formats: Optional[List[str]] = None, location: Optional[str] = None) -> Dict[str, Any]: |
|
|
fc = fc_client(s) |
|
|
kwargs: Dict[str, Any] = {"query": query, "limit": limit} |
|
|
if location: kwargs["location"] = location |
|
|
if scrape_formats: kwargs["scrape_options"] = {"formats": scrape_formats} |
|
|
res = fc.search(**kwargs) |
|
|
return _to_dict(res) |
|
|
|
|
|
@retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=1, min=1, max=10)) |
|
|
def fc_scrape(s: Keys, url: str, formats: Optional[List[str]] = None, timeout_ms: Optional[int] = None, mobile: bool = False) -> Dict[str, Any]: |
|
|
fc = fc_client(s) |
|
|
kwargs: Dict[str, Any] = {"url": url} |
|
|
if formats: kwargs["formats"] = formats |
|
|
if timeout_ms: kwargs["timeout"] = min(int(timeout_ms), 40000) |
|
|
if mobile: kwargs["mobile"] = True |
|
|
res = fc.scrape(**kwargs) |
|
|
return _to_dict(res) |
|
|
|
|
|
@retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=1, min=1, max=10)) |
|
|
def fc_crawl(s: Keys, url: str, max_pages: int = 25, formats: Optional[List[str]] = None) -> Dict[str, Any]: |
|
|
fc = fc_client(s) |
|
|
kwargs: Dict[str, Any] = {"url": url, "limit": max_pages} |
|
|
if formats: kwargs["scrape_options"] = {"formats": formats} |
|
|
res = fc.crawl(**kwargs) |
|
|
return _to_dict(res) |
|
|
|
|
|
|
|
|
SYSTEM_STEER = ( |
|
|
"You are ZEN's VibeCoder: extract web insights, generate clean scaffolds, " |
|
|
"and produce production-ready artifacts. Prefer structured outlines, code blocks, and checklists. " |
|
|
"When asked to clone or refactor, output file trees and exact text." |
|
|
) |
|
|
|
|
|
def use_openai(s: Keys): |
|
|
k = resolve_keys(s) |
|
|
if not k.openai: raise gr.Error("Missing OPENAI_API_KEY.") |
|
|
if OpenAI is None: raise gr.Error("OpenAI SDK not installed.") |
|
|
return OpenAI(api_key=k.openai) |
|
|
|
|
|
def use_anthropic(s: Keys): |
|
|
k = resolve_keys(s) |
|
|
if not k.anthropic: raise gr.Error("Missing ANTHROPIC_API_KEY.") |
|
|
if anthropic is None: raise gr.Error("Anthropic SDK not installed.") |
|
|
return anthropic.Anthropic(api_key=k.anthropic) |
|
|
|
|
|
ANTHROPIC_FALLBACKS = [ |
|
|
"claude-3-7-sonnet-2025-06-13", |
|
|
"claude-3-7-sonnet", |
|
|
"claude-3-5-sonnet-20241022", |
|
|
"claude-3-5-sonnet-20240620", |
|
|
] |
|
|
OPENAI_FALLBACKS = ["gpt-5", "gpt-4.1", "gpt-4o", "gpt-4o-mini"] |
|
|
|
|
|
def llm_once_openai(s: Keys, model: str, prompt: str, ctx: str, temp: float) -> str: |
|
|
client = use_openai(s) |
|
|
resp = client.chat.completions.create( |
|
|
model=model, temperature=temp, |
|
|
messages=[{"role":"system","content":SYSTEM_STEER}, |
|
|
{"role":"user","content":f"{prompt}\n\n=== SOURCE (markdown) ===\n{ctx}"}] |
|
|
) |
|
|
return (resp.choices[0].message.content or "").strip() |
|
|
|
|
|
def llm_once_anthropic(s: Keys, model: str, prompt: str, ctx: str, temp: float) -> str: |
|
|
client = use_anthropic(s) |
|
|
resp = client.messages.create( |
|
|
model=model, max_tokens=4000, temperature=temp, system=SYSTEM_STEER, |
|
|
messages=[{"role":"user","content":f"{prompt}\n\n=== SOURCE (markdown) ===\n{ctx}"}], |
|
|
) |
|
|
out=[] |
|
|
for blk in resp.content: |
|
|
t=getattr(blk,"text",None) |
|
|
if t: out.append(t) |
|
|
return "".join(out).strip() |
|
|
|
|
|
def llm_summarize(s: Keys, provider: str, model_name: str, prompt: str, ctx_md: str, temp: float=0.4) -> str: |
|
|
ctx = (ctx_md or "")[:150000] |
|
|
if provider == "openai": |
|
|
candidates = [model_name] + OPENAI_FALLBACKS if model_name else OPENAI_FALLBACKS |
|
|
last=None |
|
|
for m in candidates: |
|
|
try: return llm_once_openai(s, m, prompt, ctx, temp) |
|
|
except Exception as e: last=e; continue |
|
|
raise gr.Error(f"OpenAI failed across fallbacks: {last}") |
|
|
else: |
|
|
candidates = [model_name] + ANTHROPIC_FALLBACKS if model_name else ANTHROPIC_FALLBACKS |
|
|
last=None |
|
|
for m in candidates: |
|
|
try: return llm_once_anthropic(s, m, prompt, ctx, temp) |
|
|
except AnthropicNotFound as e: last=e; continue |
|
|
except Exception as e: last=e; continue |
|
|
raise gr.Error(f"Anthropic failed across fallbacks: {last}") |
|
|
|
|
|
|
|
|
def pack_zip_pages(pages: List[Dict[str, Any]]) -> bytes: |
|
|
mem = io.BytesIO() |
|
|
with zipfile.ZipFile(mem, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: |
|
|
manifest = [] |
|
|
for i, p in enumerate(pages, start=1): |
|
|
url = p.get("url") or p.get("metadata", {}).get("sourceURL") or f"page_{i}" |
|
|
slug = _hash(str(url)) |
|
|
md = p.get("markdown") or p.get("data", {}).get("markdown") or p.get("content") or "" |
|
|
html = p.get("html") or p.get("data", {}).get("html") or "" |
|
|
links = p.get("links") or p.get("data", {}).get("links") or [] |
|
|
title = p.get("title") or p.get("metadata", {}).get("title") |
|
|
if md: zf.writestr(f"{i:03d}_{slug}.md", md) |
|
|
if html: zf.writestr(f"{i:03d}_{slug}.html", html) |
|
|
manifest.append({"url": url, "title": title, "links": links}) |
|
|
zf.writestr("manifest.json", json.dumps(manifest, indent=2)) |
|
|
mem.seek(0); return mem.read() |
|
|
|
|
|
def pack_zip_corpus(corpus: List[Dict[str, Any]], merged_md: str, extras: Dict[str,str]) -> bytes: |
|
|
mem = io.BytesIO() |
|
|
with zipfile.ZipFile(mem, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: |
|
|
zf.writestr("corpus_merged.md", merged_md or "") |
|
|
zf.writestr("corpus_manifest.json", json.dumps(corpus, indent=2)) |
|
|
for name,content in extras.items(): |
|
|
zf.writestr(name, content) |
|
|
mem.seek(0); return mem.read() |
|
|
|
|
|
|
|
|
def save_keys(openai_key, anthropic_key, firecrawl_key): |
|
|
return Keys( |
|
|
openai=(openai_key or "").strip() or None, |
|
|
anthropic=(anthropic_key or "").strip() or None, |
|
|
firecrawl=(firecrawl_key or "").strip() or None, |
|
|
), gr.Info("Keys saved to this session. (Env vars still apply if set.)") |
|
|
|
|
|
def action_search(sess: Keys, query: str, limit: int, scrape_content: bool, location: str): |
|
|
if not query.strip(): raise gr.Error("Enter a search query.") |
|
|
formats = ["markdown", "links"] if scrape_content else None |
|
|
res = fc_search(sess, query=query.strip(), limit=limit, scrape_formats=formats, location=(location or None)) |
|
|
data = res.get("data", res) |
|
|
items: List[Any] = [] |
|
|
if isinstance(data, dict): |
|
|
for bucket in ("web", "news", "images", "videos", "discussion"): |
|
|
b = data.get(bucket) |
|
|
if b: |
|
|
items.extend(_listify(_to_dict(b))) |
|
|
elif isinstance(data, list): |
|
|
items = _to_dict(data) |
|
|
else: |
|
|
items = _listify(_to_dict(data)) |
|
|
if not items: |
|
|
return _pretty_json(res), res |
|
|
return json.dumps(items, indent=2), items |
|
|
|
|
|
def action_scrape(sess: Keys, url: str, mobile: bool, formats_sel: List[str], timeout_ms: int): |
|
|
if not url.strip(): raise gr.Error("Enter a URL.") |
|
|
formats = formats_sel or ["markdown", "links"] |
|
|
try: |
|
|
out = fc_scrape(sess, url.strip(), formats=formats, timeout_ms=(timeout_ms or 15000), mobile=mobile) |
|
|
pretty = _pretty_json(out) |
|
|
md = out.get("markdown") or out.get("data", {}).get("markdown") or out.get("content") or "" |
|
|
return pretty, md, out |
|
|
except RetryError as e: |
|
|
return f"<!> Scrape timed out after retries. Try increasing timeout, unchecking 'mobile', or limiting formats.\n\n{e}", "", {} |
|
|
except Exception as e: |
|
|
return f"<!> Scrape error: {e}", "", {} |
|
|
|
|
|
def action_crawl(sess: Keys, base_url: str, max_pages: int, formats_sel: List[str]): |
|
|
if not base_url.strip(): raise gr.Error("Enter a base URL to crawl.") |
|
|
formats = formats_sel or ["markdown", "links"] |
|
|
try: |
|
|
out = fc_crawl(sess, base_url.strip(), max_pages=max_pages, formats=formats) |
|
|
pages = out.get("data") |
|
|
if not isinstance(pages, list) or not pages: raise gr.Error("Crawl returned no pages.") |
|
|
zip_bytes = pack_zip_pages(pages) |
|
|
return gr.File.update(value=io.BytesIO(zip_bytes), visible=True, filename="site_clone.zip"), f"Crawled {len(pages)} pages. ZIP is ready.", pages |
|
|
except RetryError as e: |
|
|
return gr.File.update(visible=False), f"<!> Crawl timed out after retries. Reduce Max Pages or try again.\n\n{e}", [] |
|
|
except Exception as e: |
|
|
return gr.File.update(visible=False), f"<!> Crawl error: {e}", [] |
|
|
|
|
|
def action_generate(sess: Keys, provider: str, model_name: str, sys_prompt: str, user_prompt: str, context_md: str, temp: float): |
|
|
if not user_prompt.strip(): raise gr.Error("Enter a prompt or click a starter tile.") |
|
|
model = (model_name or "").strip() |
|
|
steer = (sys_prompt or "").strip() |
|
|
prompt = (("SYSTEM:\n" + steer + "\n\n") if steer else "") + user_prompt.strip() |
|
|
out = llm_summarize(sess, provider, model, prompt, context_md or "", temp=temp) |
|
|
return out |
|
|
|
|
|
|
|
|
def corpus_normalize_items(items: Any) -> List[Dict[str, Any]]: |
|
|
"""Accepts list/dict/raw and returns a list of page-like dicts with url/title/markdown/html/links.""" |
|
|
out=[] |
|
|
if isinstance(items, dict): items=[items] |
|
|
for it in _listify(items): |
|
|
d=_to_dict(it) |
|
|
if not isinstance(d, dict): continue |
|
|
url = d.get("url") or d.get("metadata",{}).get("sourceURL") or d.get("link") or "" |
|
|
title = d.get("title") or d.get("metadata",{}).get("title") or d.get("name") or "" |
|
|
md = d.get("markdown") or d.get("data",{}).get("markdown") or d.get("content") or "" |
|
|
html = d.get("html") or d.get("data",{}).get("html") or "" |
|
|
links = d.get("links") or d.get("data",{}).get("links") or [] |
|
|
out.append({"url":url,"title":title,"markdown":md,"html":html,"links":links}) |
|
|
return out |
|
|
|
|
|
def corpus_add(corpus: List[Dict[str,Any]], items: Any, include_filter: str, exclude_filter: str, dedupe: bool) -> Tuple[List[Dict[str,Any]], str]: |
|
|
added=0 |
|
|
existing = set(_hash(x.get("url","")) for x in corpus if x.get("url")) |
|
|
inc = (include_filter or "").strip().lower() |
|
|
exc = (exclude_filter or "").strip().lower() |
|
|
for rec in corpus_normalize_items(items): |
|
|
url = (rec.get("url") or "").lower() |
|
|
title = (rec.get("title") or "").lower() |
|
|
if inc and (inc not in url and inc not in title): continue |
|
|
if exc and (exc in url or exc in title): continue |
|
|
if dedupe and rec.get("url") and _hash(rec["url"]) in existing: continue |
|
|
corpus.append(rec); added+=1 |
|
|
if rec.get("url"): existing.add(_hash(rec["url"])) |
|
|
return corpus, f"Added {added} item(s). Corpus size: {len(corpus)}." |
|
|
|
|
|
def corpus_list(corpus: List[Dict[str,Any]]) -> str: |
|
|
lines=[] |
|
|
for i,rec in enumerate(corpus,1): |
|
|
url = rec.get("url") or "(no url)" |
|
|
title = rec.get("title") or "(no title)" |
|
|
mlen = len(rec.get("markdown") or "") |
|
|
lines.append(f"{i:03d}. {title} β {url} [md:{mlen} chars]") |
|
|
if not lines: return "_(empty)_" |
|
|
return "\n".join(lines) |
|
|
|
|
|
def corpus_clear() -> Tuple[List[Dict[str,Any]], str]: |
|
|
return [], "Corpus cleared." |
|
|
|
|
|
def corpus_merge_md(corpus: List[Dict[str,Any]]) -> str: |
|
|
parts=[] |
|
|
for rec in corpus: |
|
|
hdr = f"### {rec.get('title') or rec.get('url') or 'Untitled'}" |
|
|
md = rec.get("markdown") or "" |
|
|
if md: parts.append(hdr+"\n\n"+md.strip()) |
|
|
return "\n\n---\n\n".join(parts) |
|
|
|
|
|
def corpus_export(corpus: List[Dict[str,Any]], merged: str, extras: Dict[str,str]): |
|
|
data = pack_zip_corpus(corpus, merged, extras) |
|
|
return gr.File.update(value=io.BytesIO(data), visible=True, filename=f"corpus_{int(time.time())}.zip") |
|
|
|
|
|
def dual_generate(sess: Keys, model_openai: str, model_anthropic: str, sys_prompt: str, user_prompt: str, ctx_md: str, temp: float): |
|
|
if not user_prompt.strip(): raise gr.Error("Enter a prompt or use a tile.") |
|
|
steer = (sys_prompt or "").strip() |
|
|
prompt = (("SYSTEM:\n" + steer + "\n\n") if steer else "") + user_prompt.strip() |
|
|
ctx = ctx_md or "" |
|
|
|
|
|
oa_txt, an_txt = "", "" |
|
|
try: |
|
|
oa_txt = llm_summarize(sess, "openai", model_openai or "", prompt, ctx, temp) |
|
|
except Exception as e: |
|
|
oa_txt = f"<!> OpenAI error: {e}" |
|
|
try: |
|
|
an_txt = llm_summarize(sess, "anthropic", model_anthropic or "", prompt, ctx, temp) |
|
|
except Exception as e: |
|
|
an_txt = f"<!> Anthropic error: {e}" |
|
|
|
|
|
md = ( |
|
|
"### OpenAI\n\n" + (oa_txt or "_(empty)_") + |
|
|
"\n\n---\n\n" + |
|
|
"### Anthropic\n\n" + (an_txt or "_(empty)_") |
|
|
) |
|
|
return md |
|
|
|
|
|
def scaffold_from_corpus(corpus_md: str, site_name: str = "zen-scan"): |
|
|
""" |
|
|
Produce a tiny site/docs scaffold as a ZIP: |
|
|
/README.md |
|
|
/docs/index.md (from corpus) |
|
|
/docs/summary.md (brief) |
|
|
""" |
|
|
summary = (corpus_md[:1800] + ("..." if len(corpus_md) > 1800 else "")) if corpus_md else "No content." |
|
|
mem = io.BytesIO() |
|
|
with zipfile.ZipFile(mem, "w", zipfile.ZIP_DEFLATED) as zf: |
|
|
zf.writestr("README.md", f"# {site_name}\n\nAuto-generated scaffold from ZEN VibeCoder corpus.\n") |
|
|
zf.writestr("docs/index.md", corpus_md or "# Empty\n") |
|
|
zf.writestr("docs/summary.md", f"# Summary\n\n{summary}\n") |
|
|
mem.seek(0) |
|
|
return gr.File.update(value=mem, visible=True, filename=f"{site_name}_scaffold.zip") |
|
|
|
|
|
|
|
|
with gr.Blocks(css="#keys .wrap.svelte-1ipelgc { filter: none !important; }") as demo: |
|
|
gr.Markdown("## ZEN VibeCoder β Web Clone & Research Foundry") |
|
|
session_state = gr.State(Keys()) |
|
|
|
|
|
|
|
|
last_search_obj = gr.State({}) |
|
|
last_scrape_obj = gr.State({}) |
|
|
last_crawl_pages = gr.State([]) |
|
|
corpus_state = gr.State([]) |
|
|
merged_md_state = gr.State("") |
|
|
|
|
|
with gr.Accordion("π Keys (session)", open=True): |
|
|
with gr.Row(): |
|
|
openai_key = gr.Textbox(label="OPENAI_API_KEY (GPT-5 / fallbacks)", type="password", placeholder="sk-...", value=os.getenv("OPENAI_API_KEY") or "") |
|
|
anthropic_key = gr.Textbox(label="ANTHROPIC_API_KEY (Claude Sonnet)", type="password", placeholder="anthropic-key...", value=os.getenv("ANTHROPIC_API_KEY") or "") |
|
|
firecrawl_key = gr.Textbox(label="FIRECRAWL_API_KEY", type="password", placeholder="fc-...", value=os.getenv("FIRECRAWL_API_KEY") or "") |
|
|
save_btn = gr.Button("Save keys", variant="primary") |
|
|
save_msg = gr.Markdown() |
|
|
save_btn.click(save_keys, [openai_key, anthropic_key, firecrawl_key], [session_state, save_msg]) |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.Tab("π Search"): |
|
|
query = gr.Textbox(label="Query", placeholder='ex: site:docs "vector database" 2025') |
|
|
with gr.Row(): |
|
|
limit = gr.Slider(1, 20, value=6, step=1, label="Limit") |
|
|
scrape_content = gr.Checkbox(label="Also scrape results (markdown + links)", value=True) |
|
|
location = gr.Textbox(label="Location (optional)", placeholder="ex: Germany") |
|
|
go_search = gr.Button("Run Search", variant="primary") |
|
|
search_json = gr.Code(label="Results JSON", language="json") |
|
|
|
|
|
def _search(sess, q, lmt, scp, loc): |
|
|
txt, obj = action_search(sess, q, lmt, scp, loc) |
|
|
return txt, obj |
|
|
go_search.click(_search, [session_state, query, limit, scrape_content, location], [search_json, last_search_obj]) |
|
|
|
|
|
|
|
|
with gr.Tab("πΈοΈ Scrape β’ Crawl β’ Clone"): |
|
|
with gr.Row(): |
|
|
target_url = gr.Textbox(label="URL to Scrape", placeholder="https://example.com") |
|
|
timeout_ms = gr.Number(label="Timeout (ms, max 40000)", value=15000) |
|
|
with gr.Row(): |
|
|
formats_sel = gr.CheckboxGroup(choices=["markdown","html","links","screenshot"], value=["markdown","links"], label="Formats") |
|
|
mobile = gr.Checkbox(label="Emulate mobile", value=False) |
|
|
run_scrape = gr.Button("Scrape URL", variant="primary") |
|
|
scrape_json = gr.Code(label="Raw Response (JSON)", language="json") |
|
|
scrape_md = gr.Markdown(label="Markdown Preview") |
|
|
run_scrape.click(action_scrape, [session_state, target_url, mobile, formats_sel, timeout_ms], [scrape_json, scrape_md, last_scrape_obj]) |
|
|
|
|
|
gr.Markdown("---") |
|
|
|
|
|
with gr.Row(): |
|
|
base_url = gr.Textbox(label="Base URL to Crawl", placeholder="https://docs.firecrawl.dev") |
|
|
max_pages = gr.Slider(1, 200, value=25, step=1, label="Max Pages") |
|
|
formats_crawl = gr.CheckboxGroup(choices=["markdown","html","links"], value=["markdown","links"], label="Crawl Formats") |
|
|
run_crawl = gr.Button("Crawl & Build ZIP", variant="primary") |
|
|
zip_file = gr.File(label="Clone ZIP", visible=False) |
|
|
crawl_status = gr.Markdown() |
|
|
run_crawl.click(action_crawl, [session_state, base_url, max_pages, formats_crawl], [zip_file, crawl_status, last_crawl_pages]) |
|
|
|
|
|
|
|
|
with gr.Tab("π¦ Corpus & Build"): |
|
|
with gr.Row(): |
|
|
include_filter = gr.Textbox(label="Include filter (substring)", placeholder="docs, api, blog...") |
|
|
exclude_filter = gr.Textbox(label="Exclude filter (substring)", placeholder="cdn, tracking, terms...") |
|
|
dedupe = gr.Checkbox(label="Dedupe by URL", value=True) |
|
|
with gr.Row(): |
|
|
add_from_search = gr.Button("Add from Last Search") |
|
|
add_from_scrape = gr.Button("Add from Last Scrape") |
|
|
add_from_crawl = gr.Button("Add from Last Crawl") |
|
|
status_corpus = gr.Markdown() |
|
|
corpus_list_md = gr.Markdown(label="Corpus Items") |
|
|
|
|
|
def do_add_from_search(corpus, items, inc, exc, dd): |
|
|
corpus, msg = corpus_add(corpus or [], items, inc, exc, dd) |
|
|
return corpus, msg, corpus_list(corpus) |
|
|
def do_add_from_scrape(corpus, obj, inc, exc, dd): |
|
|
corpus, msg = corpus_add(corpus or [], obj, inc, exc, dd) |
|
|
return corpus, msg, corpus_list(corpus) |
|
|
def do_add_from_crawl(corpus, pages, inc, exc, dd): |
|
|
corpus, msg = corpus_add(corpus or [], pages, inc, exc, dd) |
|
|
return corpus, msg, corpus_list(corpus) |
|
|
|
|
|
add_from_search.click(do_add_from_search, [corpus_state, last_search_obj, include_filter, exclude_filter, dedupe], [corpus_state, status_corpus, corpus_list_md]) |
|
|
add_from_scrape.click(do_add_from_scrape, [corpus_state, last_scrape_obj, include_filter, exclude_filter, dedupe], [corpus_state, status_corpus, corpus_list_md]) |
|
|
add_from_crawl.click(do_add_from_crawl, [corpus_state, last_crawl_pages, include_filter, exclude_filter, dedupe], [corpus_state, status_corpus, corpus_list_md]) |
|
|
|
|
|
with gr.Row(): |
|
|
merge_btn = gr.Button("Merge β Markdown", variant="primary") |
|
|
clear_btn = gr.Button("Clear Corpus", variant="secondary") |
|
|
merged_md = gr.Textbox(label="Merged Markdown (editable)", lines=12) |
|
|
|
|
|
def do_merge(corpus): |
|
|
md = corpus_merge_md(corpus or []) |
|
|
return md, md |
|
|
def do_clear(): |
|
|
c,msg = corpus_clear() |
|
|
return c, msg, corpus_list(c), "" |
|
|
merge_btn.click(do_merge, [corpus_state], [merged_md, merged_md_state]) |
|
|
clear_btn.click(do_clear, [], [corpus_state, status_corpus, corpus_list_md, merged_md]) |
|
|
|
|
|
gr.Markdown("---") |
|
|
with gr.Row(): |
|
|
site_name = gr.Textbox(label="Scaffold Name", value="zen-scan") |
|
|
scaffold_btn = gr.Button("Generate Minimal Site Scaffold (ZIP)") |
|
|
scaffold_zip = gr.File(visible=False) |
|
|
scaffold_btn.click(lambda md, name: scaffold_from_corpus(md, name or "zen-scan"), |
|
|
[merged_md], [scaffold_zip]) |
|
|
|
|
|
gr.Markdown("---") |
|
|
with gr.Row(): |
|
|
export_zip_btn = gr.Button("Export Corpus (ZIP)") |
|
|
export_zip_file = gr.File(visible=False) |
|
|
|
|
|
def do_export(corpus, merged): |
|
|
extras = {"README.txt": "Exported by ZEN VibeCoder"} |
|
|
return corpus_export(corpus or [], merged or "", extras) |
|
|
export_zip_btn.click(do_export, [corpus_state, merged_md], [export_zip_file]) |
|
|
|
|
|
|
|
|
with gr.Tab("β¨ Vibe Code (Synthesis)"): |
|
|
with gr.Row(): |
|
|
provider = gr.Radio(choices=["openai","anthropic"], value="openai", label="Provider") |
|
|
model_name = gr.Textbox(label="Model (override)", placeholder="(blank = auto fallback)") |
|
|
temp = gr.Slider(0.0, 1.2, value=0.4, step=0.05, label="Temperature") |
|
|
sys_prompt = gr.Textbox(label="System Style (optional)", |
|
|
value="Return structured outputs with file trees, code blocks and ordered steps. Be concise and concrete.") |
|
|
user_prompt = gr.Textbox(label="User Prompt", lines=6) |
|
|
ctx_md = gr.Textbox(label="Context (paste markdown or click Merge first)", lines=10) |
|
|
gen_btn = gr.Button("Generate", variant="primary") |
|
|
out_md = gr.Markdown() |
|
|
gr.Markdown("**Starter Tiles**") |
|
|
with gr.Row(): |
|
|
t1 = gr.Button("π§ Clone Docs β Clean README") |
|
|
t2 = gr.Button("π§ Competitor Matrix") |
|
|
t3 = gr.Button("π§ͺ Python API Client") |
|
|
t4 = gr.Button("π ZEN Landing Rewrite") |
|
|
t5 = gr.Button("π Dataset & ETL Plan") |
|
|
def fill_tile(tile: str): |
|
|
tiles = { |
|
|
"t1": "Create a clean knowledge pack from the context, then output a README.md with: Overview, Key features, Quickstart, API endpoints, Notes & gotchas, License. Include a /docs/ outline.", |
|
|
"t2": "Produce a feature matrix, pricing table, ICP notes, moats/risks, and a market POV. End with a ZEN playbook: 5 lever moves.", |
|
|
"t3": "Design a Python client that wraps the target API with retry/backoff and typed responses. Provide package layout, requirements, client.py, examples/, and README.", |
|
|
"t4": "Rewrite the landing content in ZEN brand voice: headline, 3 value props, social proof, CTA, concise FAQ. Provide HTML sections and copy.", |
|
|
"t5": "Propose a dataset schema. Output a table of fields, types, constraints, plus an ETL plan (sources, transforms, validation, freshness, monitoring).", |
|
|
} |
|
|
return tiles[tile] |
|
|
t1.click(lambda: fill_tile("t1"), outputs=[user_prompt]) |
|
|
t2.click(lambda: fill_tile("t2"), outputs=[user_prompt]) |
|
|
t3.click(lambda: fill_tile("t3"), outputs=[user_prompt]) |
|
|
t4.click(lambda: fill_tile("t4"), outputs=[user_prompt]) |
|
|
t5.click(lambda: fill_tile("t5"), outputs=[user_prompt]) |
|
|
gen_btn.click(action_generate, [session_state, provider, model_name, sys_prompt, user_prompt, ctx_md, temp], [out_md]) |
|
|
|
|
|
|
|
|
with gr.Tab("π§ͺ Dual Synth (OpenAI vs Anthropic)"): |
|
|
with gr.Row(): |
|
|
model_openai = gr.Textbox(label="OpenAI Model", placeholder="(blank = auto fallback)") |
|
|
model_anthropic = gr.Textbox(label="Anthropic Model", placeholder="(blank = auto fallback)") |
|
|
temp2 = gr.Slider(0.0, 1.2, value=0.4, step=0.05, label="Temperature") |
|
|
sys2 = gr.Textbox(label="System Style (optional)", value="Return structured outputs with file trees and clear steps.") |
|
|
user2 = gr.Textbox(label="User Prompt", lines=6, value="Summarize the corpus and propose a 5-step execution plan.") |
|
|
ctx2 = gr.Textbox(label="Context (tip: click Merge in Corpus tab)", lines=10) |
|
|
dual_btn = gr.Button("Run Dual Synthesis", variant="primary") |
|
|
dual_md = gr.Markdown() |
|
|
dual_btn.click(dual_generate, [session_state, model_openai, model_anthropic, sys2, user2, ctx2, temp2], [dual_md]) |
|
|
|
|
|
gr.Markdown("Built for **ZEN Arena** pipelines. Export ZIPs β ingest β credentialize via ZEN Cards.") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(ssr_mode=False) |
|
|
|