import gradio as gr from huggingface_hub import InferenceClient from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline from peft import PeftModel import torch # Replace 'your-huggingface-hub-repo' with your repository name or URL REPO_NAME = "shanaka95/autotrain-01-24" # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained(REPO_NAME) # Load the base model (ensure compatibility with the adapter) base_model_name = "meta-llama/Llama-3.1-8B-Instruct" # Replace with the name of the original base model base_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float16) # Load the adapter model adapter_model = PeftModel.from_pretrained(base_model, REPO_NAME) # Merge the adapter weights into the base model for inference adapter_model.eval() # Create a pipeline for text generation text_generation = pipeline( "text-generation", model=adapter_model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1 # Use GPU if available ) def respond( message, history: list[tuple[str, str]], max_tokens, temperature, top_p, ): input_text = [ {'role': 'system', 'content': "You are a highly knowledgeable crypto market analysis expert specializing in BTC/USDT. Using your expertise in both fundamental and technical analysis, evaluate the provided market price data and market news for BTC/USDT from today and the previous four days. After thorough analysis, assist in determining whether tomorrow's BTC/USDT price is likely to increase or decrease compared to today. If the price is predicted to increase, estimate an average maximum price, if the price is predicted to decrease, estimate an average minimum price."}, {'role': 'user', 'content': message} ] generated_texts = text_generation( input_text, max_new_tokens=max_tokens, num_return_sequences=1, temperature=temperature, top_p=top_p ) return generated_texts[0]['generated_text'][2]['content'] """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ demo = gr.ChatInterface( respond, additional_inputs=[ gr.Slider(minimum=1, maximum=2048, value=128, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.4, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], ) if __name__ == "__main__": demo.launch()