Spaces:

TheShellMaster
/

cypher-coder

Running

DJAKOUA KWANKAM BRAYAN STEVE

fix: update Groq model list (llama3-70b deprecated -> llama-3.3-70b-versatile)

e228496 5 days ago

11.1 kB

	import os
	import json
	import time
	import uuid
	import requests
	from datetime import datetime
	from io import BytesIO
	from fastapi import FastAPI, Request
	from fastapi.responses import JSONResponse, HTMLResponse
	import gradio as gr
	from huggingface_hub import HfApi, create_repo

	token = os.environ.get("HF_TOKEN")
	api = HfApi(token=token)
	app = FastAPI()

	PROVIDER = os.environ.get("INFERENCE_PROVIDER", "hf-inference")
	GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")

	FREE_MODELS_HF = [
	"Qwen/Qwen2.5-72B-Instruct",
	"Qwen/Qwen2.5-32B-Instruct",
	]

	FREE_MODELS_GROQ = [
	"llama-3.3-70b-versatile",
	"llama-3.1-70b-versatile",
	"llama-3.1-8b-instant",
	"mixtral-8x7b-32768",
	]

	def infer_hf(messages, model, temperature=0.7, top_p=0.9, max_tokens=2048, tools=None):
	if not token:
	raise Exception("HF_TOKEN non configuré")
	headers = {"Authorization": f"Bearer {token}"}
	payload = {
	"model": model,
	"messages": messages,
	"max_tokens": max_tokens,
	"temperature": temperature,
	"top_p": top_p,
	}
	if tools:
	payload["tools"] = tools
	url = "https://router.huggingface.co/v1/chat/completions"
	resp = requests.post(url, headers=headers, json=payload, timeout=120)
	if resp.status_code in (429, 402, 503):
	raise Exception(f"HF rate/credit limit: {resp.status_code} - {resp.text[:200]}")
	if resp.status_code != 200:
	raise Exception(f"HF HTTP {resp.status_code}: {resp.text[:200]}")
	return resp.json()

	def infer_groq(messages, model, temperature=0.7, top_p=0.9, max_tokens=2048, tools=None):
	if not GROQ_API_KEY:
	raise Exception("GROQ_API_KEY non configuré")
	headers = {
	"Authorization": f"Bearer {GROQ_API_KEY}",
	"Content-Type": "application/json"
	}
	payload = {
	"model": model,
	"messages": messages,
	"max_tokens": max_tokens,
	"temperature": temperature,
	"top_p": top_p,
	}
	if tools:
	payload["tools"] = tools
	resp = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload, timeout=120)
	if resp.status_code != 200:
	raise Exception(f"Groq HTTP {resp.status_code}: {resp.text[:200]}")
	return resp.json()

	def search_web(query):
	try:
	from duckduckgo_search import DDGS
	ddgs = DDGS()
	results = list(ddgs.text(query, max_results=4))
	if not results:
	return "Aucun résultat trouvé."
	formatted = [f"Titre: {r['title']}\nRésumé: {r['body']}\nLien: {r['href']}" for r in results]
	return "\n\n".join(formatted)
	except Exception as e:
	return f"Erreur recherche: {str(e)}"

	def save_log(username, message, response):
	if not token:
	return
	try:
	user = api.whoami()["name"]
	repo_id = f"{user}/cypher-coder-logs"
	try:
	create_repo(repo_id, token=token, repo_type="dataset", private=True, exist_ok=True)
	except Exception:
	pass
	log_entry = {
	"username": username,
	"timestamp": datetime.utcnow().isoformat(),
	"message": message,
	"response": response
	}
	file_path = f"logs/{username}/{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}.json"
	api.upload_file(
	path_or_fileobj=BytesIO(json.dumps(log_entry, ensure_ascii=False, indent=2).encode("utf-8")),
	path_in_repo=file_path,
	repo_id=repo_id,
	repo_type="dataset",
	token=token
	)
	except Exception as e:
	print(f"Erreur log: {e}")

	@app.get("/")
	async def root(request: Request):
	try:
	with open("index.html", "r", encoding="utf-8") as f:
	return HTMLResponse(content=f.read(), status_code=200)
	except Exception as e:
	return HTMLResponse(content=f"Error: {str(e)}", status_code=500)

	@app.get("/api/user-profile")
	async def user_profile(request: Request):
	return {
	"username": request.headers.get("x-hf-user-name", "invité"),
	"avatar": request.headers.get("x-hf-user-avatar", ""),
	"email": request.headers.get("x-hf-user-email", "")
	}

	@app.post("/api/chat")
	async def chat(request: Request):
	try:
	body = await request.json()
	messages = body.get("messages", [])
	client_tools = body.get("tools", [])
	username = body.get("username", "local-user")
	requested_model = body.get("model", "")
	temperature = body.get("temperature", 0.7)
	top_p = body.get("top_p", 0.9)
	max_tokens = body.get("max_tokens", 2048)

	has_system = any(m.get("role") == "system" for m in messages)
	if not has_system:
	messages.insert(0, {"role": "system", "content": SYSTEM_PROMPT})

	all_tools = list(client_tools)
	all_tools.append({
	"type": "function",
	"function": {
	"name": "search_web",
	"description": "Recherche des informations actualisées ou de la documentation technique sur internet.",
	"parameters": {
	"type": "object",
	"properties": {
	"query": {"type": "string", "description": "La requête de recherche."}
	},
	"required": ["query"]
	}
	}
	})

	use_tools = True
	current_provider = PROVIDER
	current_model = requested_model

	if current_provider == "groq":
	if not GROQ_API_KEY:
	current_provider = "hf-inference"
	elif not current_model:
	current_model = FREE_MODELS_GROQ[0]

	if current_provider == "hf-inference":
	if not current_model:
	current_model = FREE_MODELS_HF[0]

	while True:
	try:
	if current_provider == "groq":
	response = infer_groq(messages, current_model, temperature, top_p, max_tokens,
	all_tools if use_tools else None)
	else:
	response = infer_hf(messages, current_model, temperature, top_p, max_tokens,
	all_tools if use_tools else None)
	except Exception as e:
	err_msg = str(e)
	if use_tools and ("422" in err_msg or "tools" in err_msg.lower()):
	use_tools = False
	continue
	if "rate" in err_msg.lower() or "402" in err_msg or "429" in err_msg or "credit" in err_msg.lower():
	if current_provider == "groq":
	raise Exception(f"Groq: {err_msg}")
	if current_provider == "hf-inference":
	if GROQ_API_KEY:
	current_provider = "groq"
	current_model = FREE_MODELS_GROQ[0]
	continue
	raise Exception(f"Crédits HF épuisés et pas de fallback Groq configuré")
	if current_provider == "hf-inference" and GROQ_API_KEY:
	current_provider = "groq"
	current_model = FREE_MODELS_GROQ[0]
	continue
	raise Exception(err_msg)

	choice = response["choices"][0]

	if choice["message"].get("tool_calls"):
	has_search = any(tc["function"]["name"] == "search_web" for tc in choice["message"]["tool_calls"])

	if has_search:
	messages.append({
	"role": "assistant",
	"tool_calls": [{"id": tc["id"], "type": tc["type"],
	"function": {"name": tc["function"]["name"], "arguments": tc["function"]["arguments"]}}
	for tc in choice["message"]["tool_calls"]]
	})
	for tc in choice["message"]["tool_calls"]:
	if tc["function"]["name"] == "search_web":
	try:
	args = json.loads(tc["function"]["arguments"])
	q = args.get("query", "")
	messages.append({"role": "tool", "name": "search_web",
	"tool_call_id": tc["id"], "content": search_web(q)})
	except Exception as e:
	messages.append({"role": "tool", "name": "search_web",
	"tool_call_id": tc["id"], "content": f"Erreur: {str(e)}"})
	else:
	messages.append({"role": "tool", "name": tc["function"]["name"],
	"tool_call_id": tc["id"], "content": "En attente exécution locale..."})
	continue

	else:
	data = {
	"role": choice["message"]["role"],
	"content": choice["message"]["content"],
	"tool_calls": [{"id": tc["id"], "type": tc["type"],
	"function": {"name": tc["function"]["name"], "arguments": tc["function"]["arguments"]}}
	for tc in choice["message"]["tool_calls"]]
	}
	um = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
	save_log(username, um, choice["message"]["content"] or "[Outils locaux]")
	return JSONResponse(content={"message": data})

	else:
	data = {"role": choice["message"]["role"], "content": choice["message"]["content"]}
	um = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
	save_log(username, um, choice["message"]["content"] or "")
	return JSONResponse(content={"message": data})

	except Exception as e:
	return JSONResponse(content={"error": str(e)}, status_code=500)

	SYSTEM_PROMPT = """Tu es Cypher Coder, un agent de programmation IA. Tu as été créé par DJAKOUA KWANKAM.

	[RÈGLE : SEARCH-BEFORE-CODE]
	- Avant de générer du code ou répondre, cherche sur le web (outil search_web) pour obtenir des infos à jour.
	- Ne te base jamais uniquement sur ta mémoire interne.

	Tu as accès à des outils locaux (lire/écrire fichiers, exécuter commandes). Sois concis. Formate en Markdown."""

	theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="cyan", neutral_hue="slate")
	css = """footer {visibility: hidden}"""

	with gr.Blocks(theme=theme, css=css) as demo:
	gr.HTML("<h1>💻 Cypher Coder</h1><p>Agent CLI autonome - IUT de Douala</p>")

	app = gr.mount_gradio_app(app, demo, path="/gradio")

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)