RodriiS commited on
Commit
fb133d5
·
verified ·
1 Parent(s): 01aaf5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -18
app.py CHANGED
@@ -1,18 +1,31 @@
1
  import os
2
  from dotenv import load_dotenv
3
  import gradio as gr
4
- from huggingface_hub import InferenceClient
5
 
6
- # Carrega variáveis de ambiente (para testes locais; no Space, usa secrets)
7
  load_dotenv()
8
  api_key = os.getenv("HF_API_TOKEN")
9
-
10
- # Verifica se o token existe
11
  if not api_key:
12
- raise ValueError("Erro: HF_API_TOKEN não encontrado. Configure-o nas secrets do Space (Settings > Repository secrets).")
 
 
 
 
 
 
 
 
13
 
14
- # Inicializa o cliente de inferência com o modelo Mistral
15
- client = InferenceClient(token=api_key, model="mistralai/Mistral-7B-Instruct-v0.3")
 
 
 
 
 
 
 
16
 
17
  # Função do chatbot
18
  def chat_with_llm(message, history):
@@ -25,22 +38,27 @@ def chat_with_llm(message, history):
25
  messages.append({"role": "assistant", "content": bot_msg})
26
  messages.append({"role": "user", "content": message})
27
 
28
- # Chama a API do Mistral
29
- response = client.chat.completions.create(
30
- model="mistralai/Mistral-7B-Instruct-v0.3",
31
- messages=messages,
32
- max_tokens=150,
33
- temperature=0.7,
 
 
 
 
34
  )
35
- return response.choices[0].message.content
 
36
  except Exception as e:
37
- return f"Erro: {str(e)}. Verifique o token API, conexão com a internet ou limites da API gratuita."
38
 
39
- # Cria a interface do Gradio
40
  demo = gr.ChatInterface(
41
  fn=chat_with_llm,
42
- title="Chatbot com Mistral (Gratuito via HF API)",
43
- description="Teste o chatbot usando Mistral. Plano gratuito com limites.",
44
  )
45
 
46
  if __name__ == "__main__":
 
1
  import os
2
  from dotenv import load_dotenv
3
  import gradio as gr
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
 
6
+ # Carrega variáveis de ambiente
7
  load_dotenv()
8
  api_key = os.getenv("HF_API_TOKEN")
 
 
9
  if not api_key:
10
+ raise ValueError("Erro: HF_API_TOKEN não encontrado. Configure-o nas secrets do Space.")
11
+
12
+ # Configuração de quantização
13
+ quant_config = BitsAndBytesConfig(
14
+ load_in_4bit=True,
15
+ bnb_4bit_quant_type="nf4",
16
+ bnb_4bit_use_double_quant=True,
17
+ bnb_4bit_compute_dtype="bfloat16"
18
+ )
19
 
20
+ # Inicializa o modelo e o tokenizer
21
+ model_id = "Qwen/Qwen2-7B-Instruct-GPTQ"
22
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=api_key)
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ model_id,
25
+ quantization_config=quant_config,
26
+ device_map="auto",
27
+ use_auth_token=api_key
28
+ )
29
 
30
  # Função do chatbot
31
  def chat_with_llm(message, history):
 
38
  messages.append({"role": "assistant", "content": bot_msg})
39
  messages.append({"role": "user", "content": message})
40
 
41
+ # Tokeniza a entrada
42
+ inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
43
+
44
+ # Gera a resposta
45
+ outputs = model.generate(
46
+ inputs,
47
+ max_new_tokens=500,
48
+ temperature=0.5,
49
+ top_p=0.9,
50
+ do_sample=True
51
  )
52
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
53
+ return response
54
  except Exception as e:
55
+ return f"Erro: {str(e)}. Verifique o token API, conexão com a internet ou requisitos de hardware."
56
 
57
+ # Interface Gradio
58
  demo = gr.ChatInterface(
59
  fn=chat_with_llm,
60
+ title="Chatbot com Qwen2-7B Quantizado",
61
+ description="Chatbot usando Qwen2-7B quantizado em 4-bit para maior eficiência.",
62
  )
63
 
64
  if __name__ == "__main__":