import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # Load model and tokenizer (CPU only) tokenizer = AutoTokenizer.from_pretrained("unsloth/qwen2.5-math-1.5b") model = AutoModelForCausalLM.from_pretrained( "unsloth/qwen2.5-math-1.5b", device_map="cpu" # forces CPU ) generator = pipeline( "text-generation", model=model, tokenizer=tokenizer, device=-1 # CPU ) # Gradio response function def respond(message, history, system_message, max_tokens, temperature, top_p): full_prompt = f"{system_message}\n" for h in history: full_prompt += f"User: {h['user']}\nBot: {h['bot']}\n" full_prompt += f"User: {message}\nBot:" output = generator( full_prompt, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_p=top_p )[0]["generated_text"] # Extract the new bot response only bot_response = output[len(full_prompt):].strip() history.append({"user": message, "bot": bot_response}) return history, history # Chat interface chatbot = gr.ChatInterface( respond, type="messages", additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=512, value=256, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p (nucleus sampling)"), ] ) with gr.Blocks() as demo: with gr.Sidebar(): gr.LoginButton() chatbot.render() if __name__ == "__main__": demo.launch()