Spaces:
Running
Running
DJAKOUA KWANKAM BRAYAN STEVE
fix: update Groq model list (llama3-70b deprecated -> llama-3.3-70b-versatile)
e228496 | import os | |
| import json | |
| import time | |
| import uuid | |
| import requests | |
| from datetime import datetime | |
| from io import BytesIO | |
| from fastapi import FastAPI, Request | |
| from fastapi.responses import JSONResponse, HTMLResponse | |
| import gradio as gr | |
| from huggingface_hub import HfApi, create_repo | |
| token = os.environ.get("HF_TOKEN") | |
| api = HfApi(token=token) | |
| app = FastAPI() | |
| PROVIDER = os.environ.get("INFERENCE_PROVIDER", "hf-inference") | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "") | |
| FREE_MODELS_HF = [ | |
| "Qwen/Qwen2.5-72B-Instruct", | |
| "Qwen/Qwen2.5-32B-Instruct", | |
| ] | |
| FREE_MODELS_GROQ = [ | |
| "llama-3.3-70b-versatile", | |
| "llama-3.1-70b-versatile", | |
| "llama-3.1-8b-instant", | |
| "mixtral-8x7b-32768", | |
| ] | |
| def infer_hf(messages, model, temperature=0.7, top_p=0.9, max_tokens=2048, tools=None): | |
| if not token: | |
| raise Exception("HF_TOKEN non configuré") | |
| headers = {"Authorization": f"Bearer {token}"} | |
| payload = { | |
| "model": model, | |
| "messages": messages, | |
| "max_tokens": max_tokens, | |
| "temperature": temperature, | |
| "top_p": top_p, | |
| } | |
| if tools: | |
| payload["tools"] = tools | |
| url = "https://router.huggingface.co/v1/chat/completions" | |
| resp = requests.post(url, headers=headers, json=payload, timeout=120) | |
| if resp.status_code in (429, 402, 503): | |
| raise Exception(f"HF rate/credit limit: {resp.status_code} - {resp.text[:200]}") | |
| if resp.status_code != 200: | |
| raise Exception(f"HF HTTP {resp.status_code}: {resp.text[:200]}") | |
| return resp.json() | |
| def infer_groq(messages, model, temperature=0.7, top_p=0.9, max_tokens=2048, tools=None): | |
| if not GROQ_API_KEY: | |
| raise Exception("GROQ_API_KEY non configuré") | |
| headers = { | |
| "Authorization": f"Bearer {GROQ_API_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| payload = { | |
| "model": model, | |
| "messages": messages, | |
| "max_tokens": max_tokens, | |
| "temperature": temperature, | |
| "top_p": top_p, | |
| } | |
| if tools: | |
| payload["tools"] = tools | |
| resp = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload, timeout=120) | |
| if resp.status_code != 200: | |
| raise Exception(f"Groq HTTP {resp.status_code}: {resp.text[:200]}") | |
| return resp.json() | |
| def search_web(query): | |
| try: | |
| from duckduckgo_search import DDGS | |
| ddgs = DDGS() | |
| results = list(ddgs.text(query, max_results=4)) | |
| if not results: | |
| return "Aucun résultat trouvé." | |
| formatted = [f"Titre: {r['title']}\nRésumé: {r['body']}\nLien: {r['href']}" for r in results] | |
| return "\n\n".join(formatted) | |
| except Exception as e: | |
| return f"Erreur recherche: {str(e)}" | |
| def save_log(username, message, response): | |
| if not token: | |
| return | |
| try: | |
| user = api.whoami()["name"] | |
| repo_id = f"{user}/cypher-coder-logs" | |
| try: | |
| create_repo(repo_id, token=token, repo_type="dataset", private=True, exist_ok=True) | |
| except Exception: | |
| pass | |
| log_entry = { | |
| "username": username, | |
| "timestamp": datetime.utcnow().isoformat(), | |
| "message": message, | |
| "response": response | |
| } | |
| file_path = f"logs/{username}/{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}.json" | |
| api.upload_file( | |
| path_or_fileobj=BytesIO(json.dumps(log_entry, ensure_ascii=False, indent=2).encode("utf-8")), | |
| path_in_repo=file_path, | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| token=token | |
| ) | |
| except Exception as e: | |
| print(f"Erreur log: {e}") | |
| async def root(request: Request): | |
| try: | |
| with open("index.html", "r", encoding="utf-8") as f: | |
| return HTMLResponse(content=f.read(), status_code=200) | |
| except Exception as e: | |
| return HTMLResponse(content=f"Error: {str(e)}", status_code=500) | |
| async def user_profile(request: Request): | |
| return { | |
| "username": request.headers.get("x-hf-user-name", "invité"), | |
| "avatar": request.headers.get("x-hf-user-avatar", ""), | |
| "email": request.headers.get("x-hf-user-email", "") | |
| } | |
| async def chat(request: Request): | |
| try: | |
| body = await request.json() | |
| messages = body.get("messages", []) | |
| client_tools = body.get("tools", []) | |
| username = body.get("username", "local-user") | |
| requested_model = body.get("model", "") | |
| temperature = body.get("temperature", 0.7) | |
| top_p = body.get("top_p", 0.9) | |
| max_tokens = body.get("max_tokens", 2048) | |
| has_system = any(m.get("role") == "system" for m in messages) | |
| if not has_system: | |
| messages.insert(0, {"role": "system", "content": SYSTEM_PROMPT}) | |
| all_tools = list(client_tools) | |
| all_tools.append({ | |
| "type": "function", | |
| "function": { | |
| "name": "search_web", | |
| "description": "Recherche des informations actualisées ou de la documentation technique sur internet.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "query": {"type": "string", "description": "La requête de recherche."} | |
| }, | |
| "required": ["query"] | |
| } | |
| } | |
| }) | |
| use_tools = True | |
| current_provider = PROVIDER | |
| current_model = requested_model | |
| if current_provider == "groq": | |
| if not GROQ_API_KEY: | |
| current_provider = "hf-inference" | |
| elif not current_model: | |
| current_model = FREE_MODELS_GROQ[0] | |
| if current_provider == "hf-inference": | |
| if not current_model: | |
| current_model = FREE_MODELS_HF[0] | |
| while True: | |
| try: | |
| if current_provider == "groq": | |
| response = infer_groq(messages, current_model, temperature, top_p, max_tokens, | |
| all_tools if use_tools else None) | |
| else: | |
| response = infer_hf(messages, current_model, temperature, top_p, max_tokens, | |
| all_tools if use_tools else None) | |
| except Exception as e: | |
| err_msg = str(e) | |
| if use_tools and ("422" in err_msg or "tools" in err_msg.lower()): | |
| use_tools = False | |
| continue | |
| if "rate" in err_msg.lower() or "402" in err_msg or "429" in err_msg or "credit" in err_msg.lower(): | |
| if current_provider == "groq": | |
| raise Exception(f"Groq: {err_msg}") | |
| if current_provider == "hf-inference": | |
| if GROQ_API_KEY: | |
| current_provider = "groq" | |
| current_model = FREE_MODELS_GROQ[0] | |
| continue | |
| raise Exception(f"Crédits HF épuisés et pas de fallback Groq configuré") | |
| if current_provider == "hf-inference" and GROQ_API_KEY: | |
| current_provider = "groq" | |
| current_model = FREE_MODELS_GROQ[0] | |
| continue | |
| raise Exception(err_msg) | |
| choice = response["choices"][0] | |
| if choice["message"].get("tool_calls"): | |
| has_search = any(tc["function"]["name"] == "search_web" for tc in choice["message"]["tool_calls"]) | |
| if has_search: | |
| messages.append({ | |
| "role": "assistant", | |
| "tool_calls": [{"id": tc["id"], "type": tc["type"], | |
| "function": {"name": tc["function"]["name"], "arguments": tc["function"]["arguments"]}} | |
| for tc in choice["message"]["tool_calls"]] | |
| }) | |
| for tc in choice["message"]["tool_calls"]: | |
| if tc["function"]["name"] == "search_web": | |
| try: | |
| args = json.loads(tc["function"]["arguments"]) | |
| q = args.get("query", "") | |
| messages.append({"role": "tool", "name": "search_web", | |
| "tool_call_id": tc["id"], "content": search_web(q)}) | |
| except Exception as e: | |
| messages.append({"role": "tool", "name": "search_web", | |
| "tool_call_id": tc["id"], "content": f"Erreur: {str(e)}"}) | |
| else: | |
| messages.append({"role": "tool", "name": tc["function"]["name"], | |
| "tool_call_id": tc["id"], "content": "En attente exécution locale..."}) | |
| continue | |
| else: | |
| data = { | |
| "role": choice["message"]["role"], | |
| "content": choice["message"]["content"], | |
| "tool_calls": [{"id": tc["id"], "type": tc["type"], | |
| "function": {"name": tc["function"]["name"], "arguments": tc["function"]["arguments"]}} | |
| for tc in choice["message"]["tool_calls"]] | |
| } | |
| um = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "") | |
| save_log(username, um, choice["message"]["content"] or "[Outils locaux]") | |
| return JSONResponse(content={"message": data}) | |
| else: | |
| data = {"role": choice["message"]["role"], "content": choice["message"]["content"]} | |
| um = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "") | |
| save_log(username, um, choice["message"]["content"] or "") | |
| return JSONResponse(content={"message": data}) | |
| except Exception as e: | |
| return JSONResponse(content={"error": str(e)}, status_code=500) | |
| SYSTEM_PROMPT = """Tu es Cypher Coder, un agent de programmation IA. Tu as été créé par DJAKOUA KWANKAM. | |
| [RÈGLE : SEARCH-BEFORE-CODE] | |
| - Avant de générer du code ou répondre, cherche sur le web (outil search_web) pour obtenir des infos à jour. | |
| - Ne te base jamais uniquement sur ta mémoire interne. | |
| Tu as accès à des outils locaux (lire/écrire fichiers, exécuter commandes). Sois concis. Formate en Markdown.""" | |
| theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="cyan", neutral_hue="slate") | |
| css = """footer {visibility: hidden}""" | |
| with gr.Blocks(theme=theme, css=css) as demo: | |
| gr.HTML("<h1>💻 Cypher Coder</h1><p>Agent CLI autonome - IUT de Douala</p>") | |
| app = gr.mount_gradio_app(app, demo, path="/gradio") | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |