Vivek16 commited on
Commit
492454c
Β·
verified Β·
1 Parent(s): 42452c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -6
app.py CHANGED
@@ -2,6 +2,9 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import asyncio
4
 
 
 
 
5
  async def respond(
6
  message,
7
  history: list[dict[str, str]],
@@ -14,8 +17,10 @@ async def respond(
14
  """
15
  Async generator to stream responses from Hugging Face InferenceClient.
16
  """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
 
18
 
 
19
  messages = [{"role": "system", "content": system_message}]
20
  messages.extend(history)
21
  messages.append({"role": "user", "content": message})
@@ -23,7 +28,6 @@ async def respond(
23
  response = ""
24
 
25
  try:
26
- # Use async for because chat_completion returns an async generator when streaming
27
  async for chunk in client.chat_completion(
28
  messages,
29
  max_tokens=max_tokens,
@@ -31,7 +35,6 @@ async def respond(
31
  temperature=temperature,
32
  top_p=top_p,
33
  ):
34
- # Each chunk contains choices/delta like OpenAI streaming
35
  choices = getattr(chunk, "choices", None) or chunk.get("choices", [])
36
  token = ""
37
  if len(choices) and getattr(choices[0].delta, "content", None):
@@ -40,12 +43,37 @@ async def respond(
40
  token = choices[0]["delta"]["content"]
41
 
42
  response += token
43
- # yield partial response to Gradio (async generator yields updates)
44
  yield response
45
 
46
  except GeneratorExit:
47
- # Happens when the client disconnects; just exit cleanly
48
  return
49
  except Exception as e:
50
- # Send a final error message to the UI
51
  yield f"[Error streaming response] {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from huggingface_hub import InferenceClient
3
  import asyncio
4
 
5
+ # -----------------------------
6
+ # Async generator to stream responses
7
+ # -----------------------------
8
  async def respond(
9
  message,
10
  history: list[dict[str, str]],
 
17
  """
18
  Async generator to stream responses from Hugging Face InferenceClient.
19
  """
20
+ # Use your actual model
21
+ client = InferenceClient(token=hf_token.token, model="unsloth/qwen2.5-math-1.5b-bnb-4bit")
22
 
23
+ # Prepare messages
24
  messages = [{"role": "system", "content": system_message}]
25
  messages.extend(history)
26
  messages.append({"role": "user", "content": message})
 
28
  response = ""
29
 
30
  try:
 
31
  async for chunk in client.chat_completion(
32
  messages,
33
  max_tokens=max_tokens,
 
35
  temperature=temperature,
36
  top_p=top_p,
37
  ):
 
38
  choices = getattr(chunk, "choices", None) or chunk.get("choices", [])
39
  token = ""
40
  if len(choices) and getattr(choices[0].delta, "content", None):
 
43
  token = choices[0]["delta"]["content"]
44
 
45
  response += token
 
46
  yield response
47
 
48
  except GeneratorExit:
 
49
  return
50
  except Exception as e:
 
51
  yield f"[Error streaming response] {str(e)}"
52
+
53
+
54
+ # -----------------------------
55
+ # Gradio Chat Interface
56
+ # -----------------------------
57
+ chatbot = gr.ChatInterface(
58
+ respond,
59
+ type="messages",
60
+ additional_inputs=[
61
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
62
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
63
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
64
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
65
+ ],
66
+ )
67
+
68
+ with gr.Blocks() as demo:
69
+ with gr.Sidebar():
70
+ gr.LoginButton()
71
+ chatbot.render()
72
+
73
+
74
+ # -----------------------------
75
+ # Launch Gradio app
76
+ # -----------------------------
77
+ if __name__ == "__main__":
78
+ demo.launch(server_name="0.0.0.0", server_port=7860)
79
+