Spaces:

Vivek16
/

VV

Runtime error

App Files Files Community

Vivek16 commited on Oct 15

Commit

492454c

verified ·

1 Parent(s): 42452c9

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -6

app.py CHANGED Viewed

@@ -2,6 +2,9 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 import asyncio
 async def respond(
     message,
     history: list[dict[str, str]],
@@ -14,8 +17,10 @@ async def respond(
     """
     Async generator to stream responses from Hugging Face InferenceClient.
     """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
     messages = [{"role": "system", "content": system_message}]
     messages.extend(history)
     messages.append({"role": "user", "content": message})
@@ -23,7 +28,6 @@ async def respond(
     response = ""
     try:
-        # Use async for because chat_completion returns an async generator when streaming
         async for chunk in client.chat_completion(
             messages,
             max_tokens=max_tokens,
@@ -31,7 +35,6 @@ async def respond(
             temperature=temperature,
             top_p=top_p,
         ):
-            # Each chunk contains choices/delta like OpenAI streaming
             choices = getattr(chunk, "choices", None) or chunk.get("choices", [])
             token = ""
             if len(choices) and getattr(choices[0].delta, "content", None):
@@ -40,12 +43,37 @@ async def respond(
                 token = choices[0]["delta"]["content"]
             response += token
-            # yield partial response to Gradio (async generator yields updates)
             yield response
     except GeneratorExit:
-        # Happens when the client disconnects; just exit cleanly
         return
     except Exception as e:
-        # Send a final error message to the UI
         yield f"[Error streaming response] {str(e)}"

 from huggingface_hub import InferenceClient
 import asyncio
+# -----------------------------
+# Async generator to stream responses
+# -----------------------------
 async def respond(
     message,
     history: list[dict[str, str]],
     """
     Async generator to stream responses from Hugging Face InferenceClient.
     """
+    # Use your actual model
+    client = InferenceClient(token=hf_token.token, model="unsloth/qwen2.5-math-1.5b-bnb-4bit")
+    # Prepare messages
     messages = [{"role": "system", "content": system_message}]
     messages.extend(history)
     messages.append({"role": "user", "content": message})
     response = ""
     try:
         async for chunk in client.chat_completion(
             messages,
             max_tokens=max_tokens,
             temperature=temperature,
             top_p=top_p,
         ):
             choices = getattr(chunk, "choices", None) or chunk.get("choices", [])
             token = ""
             if len(choices) and getattr(choices[0].delta, "content", None):
                 token = choices[0]["delta"]["content"]
             response += token
             yield response
     except GeneratorExit:
         return
     except Exception as e:
         yield f"[Error streaming response] {str(e)}"
+# -----------------------------
+# Gradio Chat Interface
+# -----------------------------
+chatbot = gr.ChatInterface(
+    respond,
+    type="messages",
+    additional_inputs=[
+        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
+    ],
+)
+with gr.Blocks() as demo:
+    with gr.Sidebar():
+        gr.LoginButton()
+    chatbot.render()
+# -----------------------------
+# Launch Gradio app
+# -----------------------------
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)