Spaces:
Runtime error
Runtime error
| import torch | |
| import transformers | |
| import bitsandbytes | |
| from datasets import load_dataset | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments | |
| from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel | |
| model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
| new_model = "sedataskan/mistral8x7B-finetuned" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name, | |
| load_in_4bit=False, | |
| torch_dtype=torch.float16, | |
| device_map="auto") | |
| tokenizer.pad_token = "!" | |
| LORA_R = 8 | |
| LORA_ALPHA = 2 * LORA_R | |
| LORA_DROPOUT = 0.1 | |
| config = LoraConfig( | |
| r=LORA_R, | |
| lora_alpha=LORA_ALPHA, | |
| target_modules=[ "w1", "w2", "w3"], # Only Training the "expert" layers | |
| lora_dropout=LORA_DROPOUT, | |
| bias="none", | |
| task_type="CAUSAL_LM" | |
| ) | |
| model = get_peft_model(model, config) | |
| def print_trainable_parameters(m): | |
| trainable_params = sum(p.numel() for p in m.parameters() if p.requires_grad) | |
| all_params = sum(p.numel() for p in m.parameters()) | |
| print(f"trainable params: {trainable_params} || all params: {all_params} || trainable%: {100 * trainable_params / all_params}") | |
| print_trainable_parameters(model) | |
| train_data = load_dataset("oscar-corpus/OSCAR-2201", "tr") | |
| print("Dataset", train_data) | |
| def generate_prompt(user_query, sep="\n\n### "): #The prompt format is taken from the official Mixtral huggingface page | |
| sys_msg= "Take a look at the following instructions and try to follow them." | |
| p = "<s> [INST]" + sys_msg +"\n"+ user_query["instruction"] + "[/INST]" + user_query["output"] + "</s>" | |
| return p | |
| max_len = 1024 | |
| def tokenize(prompt): | |
| return tokenizer( | |
| prompt + tokenizer.eos_token, | |
| truncation=True, | |
| max_length=max_len, | |
| padding="max_length" | |
| ) | |
| train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x)), remove_columns=["instruction" , "output"]) | |
| trainer = Trainer( | |
| model=model, | |
| train_dataset=train_data, | |
| args=TrainingArguments( | |
| per_device_train_batch_size=1, | |
| gradient_accumulation_steps=4, | |
| num_train_epochs=6, | |
| learning_rate=1e-4, | |
| logging_steps=2, | |
| optim="adamw_torch", | |
| save_strategy="epoch" | |
| ), | |
| data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False) | |
| ) | |
| model.config.use_cache = False | |
| # Train model | |
| trainer.train() | |
| # Save trained model | |
| trainer.model.save_pretrained(new_model) | |
| tokenizer.save_pretrained(new_model) | |
| # Push them to the HF Hub | |
| trainer.model.push_to_hub(new_model, use_temp_dir=False, token="") | |
| tokenizer.push_to_hub(new_model, use_temp_dir=False, token="") | |
| # # Format prompt | |
| # message = [ | |
| # "Türkiye'nin başkenti neresidir?" | |
| # ] | |
| # tokenizer = AutoTokenizer.from_pretrained(new_model) | |
| # prompt = tokenizer(message, return_tensors="pt", padding=True) | |
| # # Generate output | |
| # output = trainer.model.generate( | |
| # input_ids=prompt.input_ids, | |
| # attention_mask=prompt.attention_mask, | |
| # max_length=128, | |
| # do_sample=True, | |
| # top_p=0.95, | |
| # top_k=60, | |
| # num_return_sequences=1, | |
| # ) | |
| # # Print output | |
| # print(tokenizer.batch_decode(output, skip_special_tokens=True)) |