bezzam HF Staff commited on
Commit
3141ede
·
verified ·
1 Parent(s): 0de3b6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -18
app.py CHANGED
@@ -1,10 +1,6 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- # Initialize the client for a HF-hosted model
5
- # No token needed when running inside a Space owned by a Team org
6
- client = InferenceClient(model="meta-llama/Llama-3.1-8B-Instruct")
7
-
8
 
9
  def respond(
10
  message,
@@ -13,32 +9,40 @@ def respond(
13
  max_tokens,
14
  temperature,
15
  top_p,
 
16
  ):
17
  """
18
- Generate responses using HF-hosted Llama 3.1 model.
19
- This version avoids Novita/Groq routing and does not require tokens.
20
  """
 
 
21
  messages = [{"role": "system", "content": system_message}]
 
22
  messages.extend(history)
 
23
  messages.append({"role": "user", "content": message})
24
 
25
  response = ""
26
- # Stream responses using the new chat.completions API
27
- for message_chunk in client.chat.completions.create(
28
- messages=messages,
29
  max_tokens=max_tokens,
 
30
  temperature=temperature,
31
  top_p=top_p,
32
- stream=True
33
  ):
34
- delta = message_chunk.choices[0].delta
35
- if delta and delta.content:
36
- token = delta.content
37
- response += token
38
- yield response
 
 
39
 
40
 
41
- # Define the Gradio Chat Interface
 
 
42
  chatbot = gr.ChatInterface(
43
  respond,
44
  type="messages",
@@ -56,11 +60,11 @@ chatbot = gr.ChatInterface(
56
  ],
57
  )
58
 
59
- # Build the Gradio Blocks interface with optional login button
60
  with gr.Blocks() as demo:
61
  with gr.Sidebar():
62
  gr.LoginButton()
63
  chatbot.render()
64
 
 
65
  if __name__ == "__main__":
66
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
 
 
 
 
4
 
5
  def respond(
6
  message,
 
9
  max_tokens,
10
  temperature,
11
  top_p,
12
+ hf_token: gr.OAuthToken,
13
  ):
14
  """
15
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
16
  """
17
+ client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
+
19
  messages = [{"role": "system", "content": system_message}]
20
+
21
  messages.extend(history)
22
+
23
  messages.append({"role": "user", "content": message})
24
 
25
  response = ""
26
+
27
+ for message in client.chat_completion(
28
+ messages,
29
  max_tokens=max_tokens,
30
+ stream=True,
31
  temperature=temperature,
32
  top_p=top_p,
 
33
  ):
34
+ choices = message.choices
35
+ token = ""
36
+ if len(choices) and choices[0].delta.content:
37
+ token = choices[0].delta.content
38
+
39
+ response += token
40
+ yield response
41
 
42
 
43
+ """
44
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
+ """
46
  chatbot = gr.ChatInterface(
47
  respond,
48
  type="messages",
 
60
  ],
61
  )
62
 
 
63
  with gr.Blocks() as demo:
64
  with gr.Sidebar():
65
  gr.LoginButton()
66
  chatbot.render()
67
 
68
+
69
  if __name__ == "__main__":
70
+ demo.launch()