cypher-coder / app.py
DJAKOUA KWANKAM BRAYAN STEVE
fix: update Groq model list (llama3-70b deprecated -> llama-3.3-70b-versatile)
e228496
Raw
History Blame Contribute Delete
11.1 kB
import os
import json
import time
import uuid
import requests
from datetime import datetime
from io import BytesIO
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, HTMLResponse
import gradio as gr
from huggingface_hub import HfApi, create_repo
token = os.environ.get("HF_TOKEN")
api = HfApi(token=token)
app = FastAPI()
PROVIDER = os.environ.get("INFERENCE_PROVIDER", "hf-inference")
GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
FREE_MODELS_HF = [
"Qwen/Qwen2.5-72B-Instruct",
"Qwen/Qwen2.5-32B-Instruct",
]
FREE_MODELS_GROQ = [
"llama-3.3-70b-versatile",
"llama-3.1-70b-versatile",
"llama-3.1-8b-instant",
"mixtral-8x7b-32768",
]
def infer_hf(messages, model, temperature=0.7, top_p=0.9, max_tokens=2048, tools=None):
if not token:
raise Exception("HF_TOKEN non configuré")
headers = {"Authorization": f"Bearer {token}"}
payload = {
"model": model,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
}
if tools:
payload["tools"] = tools
url = "https://router.huggingface.co/v1/chat/completions"
resp = requests.post(url, headers=headers, json=payload, timeout=120)
if resp.status_code in (429, 402, 503):
raise Exception(f"HF rate/credit limit: {resp.status_code} - {resp.text[:200]}")
if resp.status_code != 200:
raise Exception(f"HF HTTP {resp.status_code}: {resp.text[:200]}")
return resp.json()
def infer_groq(messages, model, temperature=0.7, top_p=0.9, max_tokens=2048, tools=None):
if not GROQ_API_KEY:
raise Exception("GROQ_API_KEY non configuré")
headers = {
"Authorization": f"Bearer {GROQ_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": model,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
}
if tools:
payload["tools"] = tools
resp = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload, timeout=120)
if resp.status_code != 200:
raise Exception(f"Groq HTTP {resp.status_code}: {resp.text[:200]}")
return resp.json()
def search_web(query):
try:
from duckduckgo_search import DDGS
ddgs = DDGS()
results = list(ddgs.text(query, max_results=4))
if not results:
return "Aucun résultat trouvé."
formatted = [f"Titre: {r['title']}\nRésumé: {r['body']}\nLien: {r['href']}" for r in results]
return "\n\n".join(formatted)
except Exception as e:
return f"Erreur recherche: {str(e)}"
def save_log(username, message, response):
if not token:
return
try:
user = api.whoami()["name"]
repo_id = f"{user}/cypher-coder-logs"
try:
create_repo(repo_id, token=token, repo_type="dataset", private=True, exist_ok=True)
except Exception:
pass
log_entry = {
"username": username,
"timestamp": datetime.utcnow().isoformat(),
"message": message,
"response": response
}
file_path = f"logs/{username}/{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}.json"
api.upload_file(
path_or_fileobj=BytesIO(json.dumps(log_entry, ensure_ascii=False, indent=2).encode("utf-8")),
path_in_repo=file_path,
repo_id=repo_id,
repo_type="dataset",
token=token
)
except Exception as e:
print(f"Erreur log: {e}")
@app.get("/")
async def root(request: Request):
try:
with open("index.html", "r", encoding="utf-8") as f:
return HTMLResponse(content=f.read(), status_code=200)
except Exception as e:
return HTMLResponse(content=f"Error: {str(e)}", status_code=500)
@app.get("/api/user-profile")
async def user_profile(request: Request):
return {
"username": request.headers.get("x-hf-user-name", "invité"),
"avatar": request.headers.get("x-hf-user-avatar", ""),
"email": request.headers.get("x-hf-user-email", "")
}
@app.post("/api/chat")
async def chat(request: Request):
try:
body = await request.json()
messages = body.get("messages", [])
client_tools = body.get("tools", [])
username = body.get("username", "local-user")
requested_model = body.get("model", "")
temperature = body.get("temperature", 0.7)
top_p = body.get("top_p", 0.9)
max_tokens = body.get("max_tokens", 2048)
has_system = any(m.get("role") == "system" for m in messages)
if not has_system:
messages.insert(0, {"role": "system", "content": SYSTEM_PROMPT})
all_tools = list(client_tools)
all_tools.append({
"type": "function",
"function": {
"name": "search_web",
"description": "Recherche des informations actualisées ou de la documentation technique sur internet.",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "La requête de recherche."}
},
"required": ["query"]
}
}
})
use_tools = True
current_provider = PROVIDER
current_model = requested_model
if current_provider == "groq":
if not GROQ_API_KEY:
current_provider = "hf-inference"
elif not current_model:
current_model = FREE_MODELS_GROQ[0]
if current_provider == "hf-inference":
if not current_model:
current_model = FREE_MODELS_HF[0]
while True:
try:
if current_provider == "groq":
response = infer_groq(messages, current_model, temperature, top_p, max_tokens,
all_tools if use_tools else None)
else:
response = infer_hf(messages, current_model, temperature, top_p, max_tokens,
all_tools if use_tools else None)
except Exception as e:
err_msg = str(e)
if use_tools and ("422" in err_msg or "tools" in err_msg.lower()):
use_tools = False
continue
if "rate" in err_msg.lower() or "402" in err_msg or "429" in err_msg or "credit" in err_msg.lower():
if current_provider == "groq":
raise Exception(f"Groq: {err_msg}")
if current_provider == "hf-inference":
if GROQ_API_KEY:
current_provider = "groq"
current_model = FREE_MODELS_GROQ[0]
continue
raise Exception(f"Crédits HF épuisés et pas de fallback Groq configuré")
if current_provider == "hf-inference" and GROQ_API_KEY:
current_provider = "groq"
current_model = FREE_MODELS_GROQ[0]
continue
raise Exception(err_msg)
choice = response["choices"][0]
if choice["message"].get("tool_calls"):
has_search = any(tc["function"]["name"] == "search_web" for tc in choice["message"]["tool_calls"])
if has_search:
messages.append({
"role": "assistant",
"tool_calls": [{"id": tc["id"], "type": tc["type"],
"function": {"name": tc["function"]["name"], "arguments": tc["function"]["arguments"]}}
for tc in choice["message"]["tool_calls"]]
})
for tc in choice["message"]["tool_calls"]:
if tc["function"]["name"] == "search_web":
try:
args = json.loads(tc["function"]["arguments"])
q = args.get("query", "")
messages.append({"role": "tool", "name": "search_web",
"tool_call_id": tc["id"], "content": search_web(q)})
except Exception as e:
messages.append({"role": "tool", "name": "search_web",
"tool_call_id": tc["id"], "content": f"Erreur: {str(e)}"})
else:
messages.append({"role": "tool", "name": tc["function"]["name"],
"tool_call_id": tc["id"], "content": "En attente exécution locale..."})
continue
else:
data = {
"role": choice["message"]["role"],
"content": choice["message"]["content"],
"tool_calls": [{"id": tc["id"], "type": tc["type"],
"function": {"name": tc["function"]["name"], "arguments": tc["function"]["arguments"]}}
for tc in choice["message"]["tool_calls"]]
}
um = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
save_log(username, um, choice["message"]["content"] or "[Outils locaux]")
return JSONResponse(content={"message": data})
else:
data = {"role": choice["message"]["role"], "content": choice["message"]["content"]}
um = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
save_log(username, um, choice["message"]["content"] or "")
return JSONResponse(content={"message": data})
except Exception as e:
return JSONResponse(content={"error": str(e)}, status_code=500)
SYSTEM_PROMPT = """Tu es Cypher Coder, un agent de programmation IA. Tu as été créé par DJAKOUA KWANKAM.
[RÈGLE : SEARCH-BEFORE-CODE]
- Avant de générer du code ou répondre, cherche sur le web (outil search_web) pour obtenir des infos à jour.
- Ne te base jamais uniquement sur ta mémoire interne.
Tu as accès à des outils locaux (lire/écrire fichiers, exécuter commandes). Sois concis. Formate en Markdown."""
theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="cyan", neutral_hue="slate")
css = """footer {visibility: hidden}"""
with gr.Blocks(theme=theme, css=css) as demo:
gr.HTML("<h1>💻 Cypher Coder</h1><p>Agent CLI autonome - IUT de Douala</p>")
app = gr.mount_gradio_app(app, demo, path="/gradio")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)