Update app.py
Browse files
app.py
CHANGED
|
@@ -1,50 +1,39 @@
|
|
| 1 |
import os, io, json, zipfile, hashlib
|
| 2 |
-
from typing import List, Dict, Any, Optional
|
| 3 |
import gradio as gr
|
| 4 |
from pydantic import BaseModel
|
| 5 |
-
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 6 |
|
| 7 |
-
#
|
| 8 |
try:
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
load_dotenv()
|
| 11 |
except Exception:
|
| 12 |
pass
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
# OpenAI (GPT-5)
|
| 16 |
try:
|
| 17 |
from openai import OpenAI
|
| 18 |
except Exception:
|
| 19 |
OpenAI = None
|
| 20 |
|
| 21 |
-
# Anthropic (Claude Sonnet 4.5 / latest)
|
| 22 |
try:
|
| 23 |
import anthropic
|
|
|
|
| 24 |
except Exception:
|
| 25 |
anthropic = None
|
|
|
|
| 26 |
|
| 27 |
-
# Firecrawl SDK
|
| 28 |
from firecrawl import Firecrawl # v2.x
|
| 29 |
|
| 30 |
-
#
|
| 31 |
-
# Utilities
|
| 32 |
-
# --------------------------
|
| 33 |
def _to_dict(obj: Any) -> Any:
|
| 34 |
-
"""
|
| 35 |
-
Recursively convert Firecrawl/Pydantic models (SearchData, ScrapeData, CrawlData, etc.)
|
| 36 |
-
or other objects into plain Python dict/list primitives.
|
| 37 |
-
"""
|
| 38 |
-
# Pydantic v2 models
|
| 39 |
if isinstance(obj, BaseModel):
|
| 40 |
return obj.model_dump()
|
| 41 |
-
# Mapping-like
|
| 42 |
if isinstance(obj, dict):
|
| 43 |
return {k: _to_dict(v) for k, v in obj.items()}
|
| 44 |
-
# List/Tuple
|
| 45 |
if isinstance(obj, (list, tuple)):
|
| 46 |
return [_to_dict(v) for v in obj]
|
| 47 |
-
# Objects with __dict__ (fallback)
|
| 48 |
if hasattr(obj, "__dict__") and not isinstance(obj, (str, bytes)):
|
| 49 |
try:
|
| 50 |
return {k: _to_dict(v) for k, v in vars(obj).items()}
|
|
@@ -59,120 +48,154 @@ def _pretty_json(data: Any, limit: int = 300_000) -> str:
|
|
| 59 |
except Exception as e:
|
| 60 |
return f"<!> Could not serialize to JSON: {e}"
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
class Keys(BaseModel):
|
| 66 |
openai: Optional[str] = None
|
| 67 |
anthropic: Optional[str] = None
|
| 68 |
firecrawl: Optional[str] = None
|
| 69 |
|
| 70 |
-
def resolve_keys(
|
| 71 |
return Keys(
|
| 72 |
-
openai=
|
| 73 |
-
anthropic=
|
| 74 |
-
firecrawl=
|
| 75 |
)
|
| 76 |
|
| 77 |
-
#
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
keys = resolve_keys(session)
|
| 82 |
-
if not keys.firecrawl:
|
| 83 |
raise gr.Error("Missing FIRECRAWL_API_KEY. Enter it in Keys β Save.")
|
| 84 |
-
return Firecrawl(api_key=
|
| 85 |
|
| 86 |
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=8))
|
| 87 |
-
def fc_search(
|
| 88 |
-
fc = fc_client(
|
| 89 |
kwargs: Dict[str, Any] = {"query": query, "limit": limit}
|
| 90 |
if location:
|
| 91 |
kwargs["location"] = location
|
| 92 |
if scrape_formats:
|
| 93 |
kwargs["scrape_options"] = {"formats": scrape_formats}
|
| 94 |
-
|
| 95 |
-
return _to_dict(
|
| 96 |
|
| 97 |
-
@retry(stop=stop_after_attempt(
|
| 98 |
-
def fc_scrape(
|
| 99 |
-
fc = fc_client(
|
| 100 |
kwargs: Dict[str, Any] = {"url": url}
|
| 101 |
if formats:
|
| 102 |
kwargs["formats"] = formats
|
|
|
|
| 103 |
if timeout_ms:
|
| 104 |
-
kwargs["timeout"] = timeout_ms
|
| 105 |
if mobile:
|
| 106 |
kwargs["mobile"] = True
|
| 107 |
-
|
| 108 |
-
return _to_dict(
|
| 109 |
|
| 110 |
-
@retry(stop=stop_after_attempt(
|
| 111 |
-
def fc_crawl(
|
| 112 |
-
fc = fc_client(
|
| 113 |
kwargs: Dict[str, Any] = {"url": url, "limit": max_pages}
|
| 114 |
if formats:
|
| 115 |
kwargs["scrape_options"] = {"formats": formats}
|
| 116 |
-
|
| 117 |
-
return _to_dict(
|
| 118 |
|
| 119 |
-
#
|
| 120 |
-
# LLM helpers
|
| 121 |
-
# --------------------------
|
| 122 |
SYSTEM_STEER = (
|
| 123 |
"You are ZEN's VibeCoder: extract web insights, generate clean scaffolds, "
|
| 124 |
"and produce production-ready artifacts. Prefer structured outlines, code blocks, and checklists. "
|
| 125 |
"When asked to clone or refactor, output file trees and exact text."
|
| 126 |
)
|
| 127 |
|
| 128 |
-
def use_openai(
|
| 129 |
-
|
| 130 |
-
if not
|
| 131 |
raise gr.Error("Missing OPENAI_API_KEY.")
|
| 132 |
if OpenAI is None:
|
| 133 |
raise gr.Error("OpenAI SDK not installed.")
|
| 134 |
-
return OpenAI(api_key=
|
| 135 |
|
| 136 |
-
def use_anthropic(
|
| 137 |
-
|
| 138 |
-
if not
|
| 139 |
raise gr.Error("Missing ANTHROPIC_API_KEY.")
|
| 140 |
if anthropic is None:
|
| 141 |
raise gr.Error("Anthropic SDK not installed.")
|
| 142 |
-
return anthropic.Anthropic(api_key=
|
| 143 |
-
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
if provider == "openai":
|
| 146 |
-
client = use_openai(
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
else:
|
| 157 |
-
client = use_anthropic(
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
def pack_zip(pages: List[Dict[str, Any]]) -> bytes:
|
| 177 |
mem = io.BytesIO()
|
| 178 |
with zipfile.ZipFile(mem, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
@@ -183,18 +206,14 @@ def pack_zip(pages: List[Dict[str, Any]]) -> bytes:
|
|
| 183 |
md = p.get("markdown") or p.get("data", {}).get("markdown") or p.get("content") or ""
|
| 184 |
html = p.get("html") or p.get("data", {}).get("html") or ""
|
| 185 |
links = p.get("links") or p.get("data", {}).get("links") or []
|
| 186 |
-
if md:
|
| 187 |
-
|
| 188 |
-
if html:
|
| 189 |
-
zf.writestr(f"{i:03d}_{slug}.html", html)
|
| 190 |
manifest.append({"url": url, "title": p.get("title") or p.get("metadata", {}).get("title"), "links": links})
|
| 191 |
zf.writestr("manifest.json", json.dumps(manifest, indent=2))
|
| 192 |
mem.seek(0)
|
| 193 |
return mem.read()
|
| 194 |
|
| 195 |
-
#
|
| 196 |
-
# Gradio actions
|
| 197 |
-
# --------------------------
|
| 198 |
def save_keys(openai_key, anthropic_key, firecrawl_key):
|
| 199 |
return Keys(
|
| 200 |
openai=(openai_key or "").strip() or None,
|
|
@@ -202,67 +221,73 @@ def save_keys(openai_key, anthropic_key, firecrawl_key):
|
|
| 202 |
firecrawl=(firecrawl_key or "").strip() or None,
|
| 203 |
), gr.Info("Keys saved to this session. (Env vars still apply if set.)")
|
| 204 |
|
| 205 |
-
def action_search(
|
| 206 |
if not query.strip():
|
| 207 |
raise gr.Error("Enter a search query.")
|
| 208 |
formats = ["markdown", "links"] if scrape_content else None
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
data = result.get("data", result) # tolerate both shapes
|
| 212 |
items: List[Any] = []
|
| 213 |
if isinstance(data, dict):
|
| 214 |
for bucket in ("web", "news", "images", "videos", "discussion"):
|
| 215 |
-
|
|
|
|
|
|
|
| 216 |
elif isinstance(data, list):
|
| 217 |
items = _to_dict(data)
|
| 218 |
else:
|
| 219 |
-
items =
|
|
|
|
|
|
|
| 220 |
return json.dumps(items, indent=2)
|
| 221 |
|
| 222 |
-
def action_scrape(
|
| 223 |
if not url.strip():
|
| 224 |
raise gr.Error("Enter a URL.")
|
| 225 |
-
formats = formats_sel or ["markdown", "
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
out.get("markdown")
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
or ""
|
| 233 |
-
|
| 234 |
-
|
| 235 |
|
| 236 |
-
def action_crawl(
|
| 237 |
if not base_url.strip():
|
| 238 |
raise gr.Error("Enter a base URL to crawl.")
|
| 239 |
formats = formats_sel or ["markdown", "links"]
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
if not user_prompt.strip():
|
| 249 |
raise gr.Error("Enter a prompt or click a starter tile.")
|
| 250 |
-
model = (model_name or "").strip()
|
| 251 |
steer = (sys_prompt or "").strip()
|
| 252 |
prompt = (("SYSTEM:\n" + steer + "\n\n") if steer else "") + user_prompt.strip()
|
| 253 |
-
out = llm_summarize(
|
| 254 |
return out
|
| 255 |
|
| 256 |
-
#
|
| 257 |
-
# UI (Blocks)
|
| 258 |
-
# --------------------------
|
| 259 |
with gr.Blocks(css="#keys .wrap.svelte-1ipelgc { filter: none !important; }") as demo:
|
| 260 |
gr.Markdown("## ZEN VibeCoder β Web Clone & Research Foundry")
|
| 261 |
session_state = gr.State(Keys())
|
| 262 |
|
| 263 |
with gr.Accordion("π Keys (session)", open=True):
|
| 264 |
with gr.Row():
|
| 265 |
-
openai_key = gr.Textbox(label="OPENAI_API_KEY (GPT-5)", type="password", placeholder="sk-...", value=os.getenv("OPENAI_API_KEY") or "")
|
| 266 |
anthropic_key = gr.Textbox(label="ANTHROPIC_API_KEY (Claude Sonnet)", type="password", placeholder="anthropic-key...", value=os.getenv("ANTHROPIC_API_KEY") or "")
|
| 267 |
firecrawl_key = gr.Textbox(label="FIRECRAWL_API_KEY", type="password", placeholder="fc-...", value=os.getenv("FIRECRAWL_API_KEY") or "")
|
| 268 |
save_btn = gr.Button("Save keys", variant="primary")
|
|
@@ -271,7 +296,7 @@ with gr.Blocks(css="#keys .wrap.svelte-1ipelgc { filter: none !important; }") as
|
|
| 271 |
|
| 272 |
with gr.Tabs():
|
| 273 |
with gr.Tab("π Search"):
|
| 274 |
-
query = gr.Textbox(label="Query", placeholder='ex: "
|
| 275 |
with gr.Row():
|
| 276 |
limit = gr.Slider(1, 20, value=6, step=1, label="Limit")
|
| 277 |
scrape_content = gr.Checkbox(label="Also scrape results (markdown + links)", value=True)
|
|
@@ -283,7 +308,7 @@ with gr.Blocks(css="#keys .wrap.svelte-1ipelgc { filter: none !important; }") as
|
|
| 283 |
with gr.Tab("πΈοΈ Scrape β’ Crawl β’ Clone"):
|
| 284 |
with gr.Row():
|
| 285 |
target_url = gr.Textbox(label="URL to Scrape", placeholder="https://example.com")
|
| 286 |
-
timeout_ms = gr.Number(label="Timeout (ms)", value=15000)
|
| 287 |
with gr.Row():
|
| 288 |
formats_sel = gr.CheckboxGroup(choices=["markdown","html","links","screenshot"], value=["markdown","links"], label="Formats")
|
| 289 |
mobile = gr.Checkbox(label="Emulate mobile", value=False)
|
|
@@ -306,7 +331,7 @@ with gr.Blocks(css="#keys .wrap.svelte-1ipelgc { filter: none !important; }") as
|
|
| 306 |
with gr.Tab("β¨ Vibe Code (Synthesis)"):
|
| 307 |
with gr.Row():
|
| 308 |
provider = gr.Radio(choices=["openai","anthropic"], value="openai", label="Provider")
|
| 309 |
-
model_name = gr.Textbox(label="Model (override)", placeholder="
|
| 310 |
temp = gr.Slider(0.0, 1.2, value=0.4, step=0.05, label="Temperature")
|
| 311 |
|
| 312 |
sys_prompt = gr.Textbox(label="System Style (optional)",
|
|
@@ -347,5 +372,4 @@ with gr.Blocks(css="#keys .wrap.svelte-1ipelgc { filter: none !important; }") as
|
|
| 347 |
gr.Markdown("Built for **ZEN Arena** pipelines. Export ZIPs β ingest β credentialize via ZEN Cards.")
|
| 348 |
|
| 349 |
if __name__ == "__main__":
|
| 350 |
-
|
| 351 |
-
demo.launch()
|
|
|
|
| 1 |
import os, io, json, zipfile, hashlib
|
| 2 |
+
from typing import List, Dict, Any, Optional
|
| 3 |
import gradio as gr
|
| 4 |
from pydantic import BaseModel
|
| 5 |
+
from tenacity import retry, stop_after_attempt, wait_exponential, RetryError
|
| 6 |
|
| 7 |
+
# .env support (optional)
|
| 8 |
try:
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
load_dotenv()
|
| 11 |
except Exception:
|
| 12 |
pass
|
| 13 |
|
| 14 |
+
# SDKs
|
|
|
|
| 15 |
try:
|
| 16 |
from openai import OpenAI
|
| 17 |
except Exception:
|
| 18 |
OpenAI = None
|
| 19 |
|
|
|
|
| 20 |
try:
|
| 21 |
import anthropic
|
| 22 |
+
from anthropic import NotFoundError as AnthropicNotFound
|
| 23 |
except Exception:
|
| 24 |
anthropic = None
|
| 25 |
+
AnthropicNotFound = Exception # fallback type
|
| 26 |
|
|
|
|
| 27 |
from firecrawl import Firecrawl # v2.x
|
| 28 |
|
| 29 |
+
# -------------------- utils --------------------
|
|
|
|
|
|
|
| 30 |
def _to_dict(obj: Any) -> Any:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
if isinstance(obj, BaseModel):
|
| 32 |
return obj.model_dump()
|
|
|
|
| 33 |
if isinstance(obj, dict):
|
| 34 |
return {k: _to_dict(v) for k, v in obj.items()}
|
|
|
|
| 35 |
if isinstance(obj, (list, tuple)):
|
| 36 |
return [_to_dict(v) for v in obj]
|
|
|
|
| 37 |
if hasattr(obj, "__dict__") and not isinstance(obj, (str, bytes)):
|
| 38 |
try:
|
| 39 |
return {k: _to_dict(v) for k, v in vars(obj).items()}
|
|
|
|
| 48 |
except Exception as e:
|
| 49 |
return f"<!> Could not serialize to JSON: {e}"
|
| 50 |
|
| 51 |
+
def _listify(x) -> List[Any]:
|
| 52 |
+
if x is None:
|
| 53 |
+
return []
|
| 54 |
+
if isinstance(x, list):
|
| 55 |
+
return x
|
| 56 |
+
return [x]
|
| 57 |
+
|
| 58 |
+
# -------------------- keys --------------------
|
| 59 |
class Keys(BaseModel):
|
| 60 |
openai: Optional[str] = None
|
| 61 |
anthropic: Optional[str] = None
|
| 62 |
firecrawl: Optional[str] = None
|
| 63 |
|
| 64 |
+
def resolve_keys(s: Keys) -> Keys:
|
| 65 |
return Keys(
|
| 66 |
+
openai=s.openai or os.getenv("OPENAI_API_KEY"),
|
| 67 |
+
anthropic=s.anthropic or os.getenv("ANTHROPIC_API_KEY"),
|
| 68 |
+
firecrawl=s.firecrawl or os.getenv("FIRECRAWL_API_KEY"),
|
| 69 |
)
|
| 70 |
|
| 71 |
+
# -------------------- firecrawl --------------------
|
| 72 |
+
def fc_client(s: Keys) -> Firecrawl:
|
| 73 |
+
k = resolve_keys(s)
|
| 74 |
+
if not k.firecrawl:
|
|
|
|
|
|
|
| 75 |
raise gr.Error("Missing FIRECRAWL_API_KEY. Enter it in Keys β Save.")
|
| 76 |
+
return Firecrawl(api_key=k.firecrawl)
|
| 77 |
|
| 78 |
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=8))
|
| 79 |
+
def fc_search(s: Keys, query: str, limit: int = 5, scrape_formats: Optional[List[str]] = None, location: Optional[str] = None) -> Dict[str, Any]:
|
| 80 |
+
fc = fc_client(s)
|
| 81 |
kwargs: Dict[str, Any] = {"query": query, "limit": limit}
|
| 82 |
if location:
|
| 83 |
kwargs["location"] = location
|
| 84 |
if scrape_formats:
|
| 85 |
kwargs["scrape_options"] = {"formats": scrape_formats}
|
| 86 |
+
res = fc.search(**kwargs)
|
| 87 |
+
return _to_dict(res)
|
| 88 |
|
| 89 |
+
@retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=1, min=1, max=10))
|
| 90 |
+
def fc_scrape(s: Keys, url: str, formats: Optional[List[str]] = None, timeout_ms: Optional[int] = None, mobile: bool = False) -> Dict[str, Any]:
|
| 91 |
+
fc = fc_client(s)
|
| 92 |
kwargs: Dict[str, Any] = {"url": url}
|
| 93 |
if formats:
|
| 94 |
kwargs["formats"] = formats
|
| 95 |
+
# give slow pages more time; cap at 40s
|
| 96 |
if timeout_ms:
|
| 97 |
+
kwargs["timeout"] = min(int(timeout_ms), 40000)
|
| 98 |
if mobile:
|
| 99 |
kwargs["mobile"] = True
|
| 100 |
+
res = fc.scrape(**kwargs)
|
| 101 |
+
return _to_dict(res)
|
| 102 |
|
| 103 |
+
@retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=1, min=1, max=10))
|
| 104 |
+
def fc_crawl(s: Keys, url: str, max_pages: int = 25, formats: Optional[List[str]] = None) -> Dict[str, Any]:
|
| 105 |
+
fc = fc_client(s)
|
| 106 |
kwargs: Dict[str, Any] = {"url": url, "limit": max_pages}
|
| 107 |
if formats:
|
| 108 |
kwargs["scrape_options"] = {"formats": formats}
|
| 109 |
+
res = fc.crawl(**kwargs)
|
| 110 |
+
return _to_dict(res)
|
| 111 |
|
| 112 |
+
# -------------------- LLMs --------------------
|
|
|
|
|
|
|
| 113 |
SYSTEM_STEER = (
|
| 114 |
"You are ZEN's VibeCoder: extract web insights, generate clean scaffolds, "
|
| 115 |
"and produce production-ready artifacts. Prefer structured outlines, code blocks, and checklists. "
|
| 116 |
"When asked to clone or refactor, output file trees and exact text."
|
| 117 |
)
|
| 118 |
|
| 119 |
+
def use_openai(s: Keys):
|
| 120 |
+
k = resolve_keys(s)
|
| 121 |
+
if not k.openai:
|
| 122 |
raise gr.Error("Missing OPENAI_API_KEY.")
|
| 123 |
if OpenAI is None:
|
| 124 |
raise gr.Error("OpenAI SDK not installed.")
|
| 125 |
+
return OpenAI(api_key=k.openai)
|
| 126 |
|
| 127 |
+
def use_anthropic(s: Keys):
|
| 128 |
+
k = resolve_keys(s)
|
| 129 |
+
if not k.anthropic:
|
| 130 |
raise gr.Error("Missing ANTHROPIC_API_KEY.")
|
| 131 |
if anthropic is None:
|
| 132 |
raise gr.Error("Anthropic SDK not installed.")
|
| 133 |
+
return anthropic.Anthropic(api_key=k.anthropic)
|
| 134 |
+
|
| 135 |
+
ANTHROPIC_FALLBACKS = [
|
| 136 |
+
# try a few known-good Sonnet identifiers
|
| 137 |
+
"claude-3-7-sonnet-2025-06-13", # example new tag
|
| 138 |
+
"claude-3-7-sonnet",
|
| 139 |
+
"claude-3-5-sonnet-20241022",
|
| 140 |
+
"claude-3-5-sonnet-20240620",
|
| 141 |
+
]
|
| 142 |
+
|
| 143 |
+
OPENAI_FALLBACKS = [
|
| 144 |
+
"gpt-5", # user-preferred
|
| 145 |
+
"gpt-4.1", # safe fallback
|
| 146 |
+
"gpt-4o", # vision-capable fallback
|
| 147 |
+
"gpt-4o-mini", # economical fallback
|
| 148 |
+
]
|
| 149 |
+
|
| 150 |
+
def llm_summarize(s: Keys, provider: str, model_name: str, prompt: str, ctx_md: str, temp: float=0.4) -> str:
|
| 151 |
+
ctx = (ctx_md or "")[:150000]
|
| 152 |
if provider == "openai":
|
| 153 |
+
client = use_openai(s)
|
| 154 |
+
candidates = [model_name] + OPENAI_FALLBACKS if model_name else OPENAI_FALLBACKS
|
| 155 |
+
last_err = None
|
| 156 |
+
for m in candidates:
|
| 157 |
+
try:
|
| 158 |
+
resp = client.chat.completions.create(
|
| 159 |
+
model=m,
|
| 160 |
+
temperature=temp,
|
| 161 |
+
messages=[
|
| 162 |
+
{"role": "system", "content": SYSTEM_STEER},
|
| 163 |
+
{"role": "user", "content": f"{prompt}\n\n=== SOURCE (markdown) ===\n{ctx}"},
|
| 164 |
+
],
|
| 165 |
+
)
|
| 166 |
+
return (resp.choices[0].message.content or "").strip()
|
| 167 |
+
except Exception as e:
|
| 168 |
+
last_err = e
|
| 169 |
+
continue
|
| 170 |
+
raise gr.Error(f"OpenAI failed across fallbacks: {last_err}")
|
| 171 |
else:
|
| 172 |
+
client = use_anthropic(s)
|
| 173 |
+
candidates = [model_name] + ANTHROPIC_FALLBACKS if model_name else ANTHROPIC_FALLBACKS
|
| 174 |
+
last_err = None
|
| 175 |
+
for m in candidates:
|
| 176 |
+
try:
|
| 177 |
+
resp = client.messages.create(
|
| 178 |
+
model=m,
|
| 179 |
+
max_tokens=4000,
|
| 180 |
+
temperature=temp,
|
| 181 |
+
system=SYSTEM_STEER,
|
| 182 |
+
messages=[{"role": "user", "content": f"{prompt}\n\n=== SOURCE (markdown) ===\n{ctx}"}],
|
| 183 |
+
)
|
| 184 |
+
chunks = []
|
| 185 |
+
for blk in resp.content:
|
| 186 |
+
t = getattr(blk, "text", None)
|
| 187 |
+
if t:
|
| 188 |
+
chunks.append(t)
|
| 189 |
+
return "".join(chunks).strip()
|
| 190 |
+
except AnthropicNotFound as e:
|
| 191 |
+
last_err = e
|
| 192 |
+
continue
|
| 193 |
+
except Exception as e:
|
| 194 |
+
last_err = e
|
| 195 |
+
continue
|
| 196 |
+
raise gr.Error(f"Anthropic failed across fallbacks: {last_err}")
|
| 197 |
+
|
| 198 |
+
# -------------------- ZIP export --------------------
|
| 199 |
def pack_zip(pages: List[Dict[str, Any]]) -> bytes:
|
| 200 |
mem = io.BytesIO()
|
| 201 |
with zipfile.ZipFile(mem, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
|
|
| 206 |
md = p.get("markdown") or p.get("data", {}).get("markdown") or p.get("content") or ""
|
| 207 |
html = p.get("html") or p.get("data", {}).get("html") or ""
|
| 208 |
links = p.get("links") or p.get("data", {}).get("links") or []
|
| 209 |
+
if md: zf.writestr(f"{i:03d}_{slug}.md", md)
|
| 210 |
+
if html: zf.writestr(f"{i:03d}_{slug}.html", html)
|
|
|
|
|
|
|
| 211 |
manifest.append({"url": url, "title": p.get("title") or p.get("metadata", {}).get("title"), "links": links})
|
| 212 |
zf.writestr("manifest.json", json.dumps(manifest, indent=2))
|
| 213 |
mem.seek(0)
|
| 214 |
return mem.read()
|
| 215 |
|
| 216 |
+
# -------------------- actions --------------------
|
|
|
|
|
|
|
| 217 |
def save_keys(openai_key, anthropic_key, firecrawl_key):
|
| 218 |
return Keys(
|
| 219 |
openai=(openai_key or "").strip() or None,
|
|
|
|
| 221 |
firecrawl=(firecrawl_key or "").strip() or None,
|
| 222 |
), gr.Info("Keys saved to this session. (Env vars still apply if set.)")
|
| 223 |
|
| 224 |
+
def action_search(sess: Keys, query: str, limit: int, scrape_content: bool, location: str):
|
| 225 |
if not query.strip():
|
| 226 |
raise gr.Error("Enter a search query.")
|
| 227 |
formats = ["markdown", "links"] if scrape_content else None
|
| 228 |
+
res = fc_search(sess, query=query.strip(), limit=limit, scrape_formats=formats, location=(location or None))
|
| 229 |
+
data = res.get("data", res)
|
|
|
|
| 230 |
items: List[Any] = []
|
| 231 |
if isinstance(data, dict):
|
| 232 |
for bucket in ("web", "news", "images", "videos", "discussion"):
|
| 233 |
+
b = data.get(bucket)
|
| 234 |
+
if b:
|
| 235 |
+
items.extend(_listify(_to_dict(b)))
|
| 236 |
elif isinstance(data, list):
|
| 237 |
items = _to_dict(data)
|
| 238 |
else:
|
| 239 |
+
items = _listify(_to_dict(data))
|
| 240 |
+
if not items:
|
| 241 |
+
return _pretty_json(res) # show raw result if buckets are empty
|
| 242 |
return json.dumps(items, indent=2)
|
| 243 |
|
| 244 |
+
def action_scrape(sess: Keys, url: str, mobile: bool, formats_sel: List[str], timeout_ms: int):
|
| 245 |
if not url.strip():
|
| 246 |
raise gr.Error("Enter a URL.")
|
| 247 |
+
formats = formats_sel or ["markdown", "links"]
|
| 248 |
+
try:
|
| 249 |
+
out = fc_scrape(sess, url.strip(), formats=formats, timeout_ms=(timeout_ms or 15000), mobile=mobile)
|
| 250 |
+
pretty = _pretty_json(out)
|
| 251 |
+
md = out.get("markdown") or out.get("data", {}).get("markdown") or out.get("content") or ""
|
| 252 |
+
return pretty, md
|
| 253 |
+
except RetryError as e:
|
| 254 |
+
return f"<!> Scrape timed out after retries. Try increasing timeout, unchecking 'mobile', or limiting formats.\n\n{e}", ""
|
| 255 |
+
except Exception as e:
|
| 256 |
+
return f"<!> Scrape error: {e}", ""
|
| 257 |
|
| 258 |
+
def action_crawl(sess: Keys, base_url: str, max_pages: int, formats_sel: List[str]):
|
| 259 |
if not base_url.strip():
|
| 260 |
raise gr.Error("Enter a base URL to crawl.")
|
| 261 |
formats = formats_sel or ["markdown", "links"]
|
| 262 |
+
try:
|
| 263 |
+
out = fc_crawl(sess, base_url.strip(), max_pages=max_pages, formats=formats)
|
| 264 |
+
pages = out.get("data")
|
| 265 |
+
if not isinstance(pages, list) or not pages:
|
| 266 |
+
raise gr.Error("Crawl returned no pages.")
|
| 267 |
+
zip_bytes = pack_zip(pages)
|
| 268 |
+
return gr.File.update(value=io.BytesIO(zip_bytes), visible=True, filename="site_clone.zip"), f"Crawled {len(pages)} pages. ZIP is ready."
|
| 269 |
+
except RetryError as e:
|
| 270 |
+
return gr.File.update(visible=False), f"<!> Crawl timed out after retries. Reduce Max Pages or try again.\n\n{e}"
|
| 271 |
+
except Exception as e:
|
| 272 |
+
return gr.File.update(visible=False), f"<!> Crawl error: {e}"
|
| 273 |
+
|
| 274 |
+
def action_generate(sess: Keys, provider: str, model_name: str, sys_prompt: str, user_prompt: str, context_md: str, temp: float):
|
| 275 |
if not user_prompt.strip():
|
| 276 |
raise gr.Error("Enter a prompt or click a starter tile.")
|
| 277 |
+
model = (model_name or "").strip()
|
| 278 |
steer = (sys_prompt or "").strip()
|
| 279 |
prompt = (("SYSTEM:\n" + steer + "\n\n") if steer else "") + user_prompt.strip()
|
| 280 |
+
out = llm_summarize(sess, provider, model, prompt, context_md or "", temp=temp)
|
| 281 |
return out
|
| 282 |
|
| 283 |
+
# -------------------- UI --------------------
|
|
|
|
|
|
|
| 284 |
with gr.Blocks(css="#keys .wrap.svelte-1ipelgc { filter: none !important; }") as demo:
|
| 285 |
gr.Markdown("## ZEN VibeCoder β Web Clone & Research Foundry")
|
| 286 |
session_state = gr.State(Keys())
|
| 287 |
|
| 288 |
with gr.Accordion("π Keys (session)", open=True):
|
| 289 |
with gr.Row():
|
| 290 |
+
openai_key = gr.Textbox(label="OPENAI_API_KEY (GPT-5 / fallbacks)", type="password", placeholder="sk-...", value=os.getenv("OPENAI_API_KEY") or "")
|
| 291 |
anthropic_key = gr.Textbox(label="ANTHROPIC_API_KEY (Claude Sonnet)", type="password", placeholder="anthropic-key...", value=os.getenv("ANTHROPIC_API_KEY") or "")
|
| 292 |
firecrawl_key = gr.Textbox(label="FIRECRAWL_API_KEY", type="password", placeholder="fc-...", value=os.getenv("FIRECRAWL_API_KEY") or "")
|
| 293 |
save_btn = gr.Button("Save keys", variant="primary")
|
|
|
|
| 296 |
|
| 297 |
with gr.Tabs():
|
| 298 |
with gr.Tab("π Search"):
|
| 299 |
+
query = gr.Textbox(label="Query", placeholder='ex: site:docs "vector database" 2025')
|
| 300 |
with gr.Row():
|
| 301 |
limit = gr.Slider(1, 20, value=6, step=1, label="Limit")
|
| 302 |
scrape_content = gr.Checkbox(label="Also scrape results (markdown + links)", value=True)
|
|
|
|
| 308 |
with gr.Tab("πΈοΈ Scrape β’ Crawl β’ Clone"):
|
| 309 |
with gr.Row():
|
| 310 |
target_url = gr.Textbox(label="URL to Scrape", placeholder="https://example.com")
|
| 311 |
+
timeout_ms = gr.Number(label="Timeout (ms, max 40000)", value=15000)
|
| 312 |
with gr.Row():
|
| 313 |
formats_sel = gr.CheckboxGroup(choices=["markdown","html","links","screenshot"], value=["markdown","links"], label="Formats")
|
| 314 |
mobile = gr.Checkbox(label="Emulate mobile", value=False)
|
|
|
|
| 331 |
with gr.Tab("β¨ Vibe Code (Synthesis)"):
|
| 332 |
with gr.Row():
|
| 333 |
provider = gr.Radio(choices=["openai","anthropic"], value="openai", label="Provider")
|
| 334 |
+
model_name = gr.Textbox(label="Model (override)", placeholder="(blank = auto fallback)")
|
| 335 |
temp = gr.Slider(0.0, 1.2, value=0.4, step=0.05, label="Temperature")
|
| 336 |
|
| 337 |
sys_prompt = gr.Textbox(label="System Style (optional)",
|
|
|
|
| 372 |
gr.Markdown("Built for **ZEN Arena** pipelines. Export ZIPs β ingest β credentialize via ZEN Cards.")
|
| 373 |
|
| 374 |
if __name__ == "__main__":
|
| 375 |
+
demo.launch(ssr_mode=False)
|
|
|