Update app
Browse files- app.py +1768 -0
- create_retriever.py +98 -0
- data/demo_table_data.xlsx +0 -0
- data/demo_test_data.xlsx +0 -0
- data/emb_data.xlsx +0 -0
- data/existing_dataset.xlsx +0 -0
- data/finetune_data.xlsx +0 -0
- data/testing_dataset.xlsx +0 -0
- data/validation-kuetllm_tanim - Copy.xlsx +0 -0
- data_ret.py +6 -0
- deploy/create_retriever.py +114 -0
- deploy/inference.py +119 -0
- deploy/main.py +75 -0
- deploy/model_ret.py +114 -0
- deploy/params.txt +1 -0
- embedding_tuner.py +202 -0
- fine_tune_file/finetune_file.py +138 -0
- fine_tune_file/flant5_finetune.py +97 -0
- fine_tune_file/llama_finetune.py +136 -0
- fine_tune_file/mistral_finetune.py +138 -0
- fine_tune_file/modular_finetune.py +177 -0
- fine_tune_file/phi_finetune.py +135 -0
- fine_tune_file/zepyhr_finetune.py +136 -0
- inference.py +81 -0
- model_ret.py +92 -0
- model_ret.py.old +179 -0
- models/.gitignore +3 -0
- params.txt +1 -0
- requirements.txt +29 -0
- score_report/2024_03_25_21_10_54model_ans_mistral_finetuned_486_colbert.xlsx +0 -0
- score_report/2024_03_25_22_09_44model_ans_zepyhr_finetuned_486_colbert.xlsx +0 -0
- score_report/2024_03_26_22_42_56model_ans_llama_finetuned_486_rag_colbert.xlsx +0 -0
- test1.txt +0 -0
- testing.ipynb +712 -0
- utils.py +165 -0
app.py
ADDED
|
@@ -0,0 +1,1768 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import random
|
| 6 |
+
from utils import display_table,current_time,random_ques_ans2,move_to,score_report_bar,all_contri_ans
|
| 7 |
+
from inference import model_chain
|
| 8 |
+
import warnings
|
| 9 |
+
from huggingface_hub import snapshot_download
|
| 10 |
+
|
| 11 |
+
snapshot_download(repo_id="CGIAR/weai-ref",
|
| 12 |
+
repo_type="dataset",
|
| 13 |
+
token=os.getenv('HF_TOKEN'),
|
| 14 |
+
local_dir='./rag_data'
|
| 15 |
+
)
|
| 16 |
+
warnings.filterwarnings('ignore')
|
| 17 |
+
os.environ["WANDB_DISABLED"] = "true"
|
| 18 |
+
global cnt
|
| 19 |
+
cnt=1
|
| 20 |
+
data=[]
|
| 21 |
+
save_ques_ans=[]
|
| 22 |
+
save_ques_ans_test=[]
|
| 23 |
+
cur_time=current_time()
|
| 24 |
+
|
| 25 |
+
from huggingface_hub import HfApi
|
| 26 |
+
# Initialize the Hugging Face API client
|
| 27 |
+
api = HfApi()
|
| 28 |
+
|
| 29 |
+
# Specify the organization where Llama models are hosted
|
| 30 |
+
organization = "meta-llama"
|
| 31 |
+
|
| 32 |
+
# List all models belonging to the specified organization
|
| 33 |
+
# This will return a list of ModelInfo objects
|
| 34 |
+
llama_models = [model.modelId for model in api.list_models(author=organization)
|
| 35 |
+
if 'chat' in model.modelId]
|
| 36 |
+
|
| 37 |
+
def random_ques_ans(model_ans):
|
| 38 |
+
df_temp=pd.read_excel(os.path.join("model_ans",str(model_ans)))
|
| 39 |
+
global cnt
|
| 40 |
+
id=int((df_temp.loc[cnt])['id'])
|
| 41 |
+
ques_temp=(df_temp.loc[cnt])['question']
|
| 42 |
+
ans_temp=(df_temp.loc[cnt])['answer']
|
| 43 |
+
cnt+=1
|
| 44 |
+
if cnt>=len(df_temp):
|
| 45 |
+
cnt=0
|
| 46 |
+
return ques_temp,ans_temp,id,0
|
| 47 |
+
return ques_temp,ans_temp,id,1
|
| 48 |
+
def save_all(model_ans):
|
| 49 |
+
temp=pd.DataFrame(data)
|
| 50 |
+
temp.to_excel(f"score_report\\{model_ans+cur_time}.xlsx",index=False)
|
| 51 |
+
gr.Info("Sucessfully save all the answer!!!")
|
| 52 |
+
|
| 53 |
+
def score_save(ques,ans,score,model_ans,token_key):
|
| 54 |
+
data.append({
|
| 55 |
+
"question":ques,
|
| 56 |
+
'answer':ans,
|
| 57 |
+
'rating':score
|
| 58 |
+
})
|
| 59 |
+
# if len(data)%5==0:
|
| 60 |
+
temp=pd.DataFrame(data)
|
| 61 |
+
temp.to_excel(f"score_report\\{model_ans+cur_time}.xlsx",index=False)
|
| 62 |
+
gr.Info("Sucessfully saved in local folder!!!")
|
| 63 |
+
ques_temp,ans_temp,id,flag=random_ques_ans(model_ans)
|
| 64 |
+
gr.Info("Your opinion is submitted successfully!!!")
|
| 65 |
+
return gr.Label(value=id,label="ID"),gr.Label(value=ques_temp, label="Question"), gr.Label(value=ans_temp, label="Answer")
|
| 66 |
+
|
| 67 |
+
def new_ques(model_ans):
|
| 68 |
+
ques_temp,ans_temp,id2,flag=random_ques_ans(model_ans)
|
| 69 |
+
return {
|
| 70 |
+
id:gr.Label(value=id2,label="ID"),
|
| 71 |
+
ques:gr.Label(value=ques_temp,label="Question"),
|
| 72 |
+
ans:gr.Label(value=ans_temp,label="Answer")
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
def save_the_ques(ques,ans,file_type = 'xlsx'):
|
| 76 |
+
"""
|
| 77 |
+
Saves a question and answer pair to a specified file (xlsx or csv).
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
ques (str): The question.
|
| 81 |
+
ans (str): The answer.
|
| 82 |
+
file_type (str, optional): The file type to save to ("xlsx" or "csv").
|
| 83 |
+
Defaults to "xlsx".
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
str: A success label.
|
| 87 |
+
"""
|
| 88 |
+
|
| 89 |
+
new_data = {"question": [ques], "answer": [ans]}
|
| 90 |
+
df_new = pd.DataFrame(new_data)
|
| 91 |
+
|
| 92 |
+
filepath = f"data/finetune_data.{file_type}"
|
| 93 |
+
|
| 94 |
+
if Path(filepath).is_file():
|
| 95 |
+
df_existing = pd.read_excel(filepath) if file_type == "xlsx" else pd.read_csv(filepath)
|
| 96 |
+
df_combined = pd.concat([df_existing, df_new], ignore_index=True)
|
| 97 |
+
else:
|
| 98 |
+
df_combined = df_new
|
| 99 |
+
|
| 100 |
+
if file_type == "xlsx":
|
| 101 |
+
df_combined.to_excel(filepath, index=False)
|
| 102 |
+
elif file_type == "csv":
|
| 103 |
+
df_combined.to_csv(filepath, index=False)
|
| 104 |
+
|
| 105 |
+
return gr.Label(value="Successfully saved in local folder.", visible=True)
|
| 106 |
+
|
| 107 |
+
def save_the_ques_test(ques, ans, file_type = 'xlsx'):
|
| 108 |
+
"""
|
| 109 |
+
Saves a question and answer pair to a specified file (xlsx or csv).
|
| 110 |
+
|
| 111 |
+
Args:
|
| 112 |
+
ques (str): The question.
|
| 113 |
+
ans (str): The answer.
|
| 114 |
+
file_type (str, optional): The file type to save to ("xlsx" or "csv").
|
| 115 |
+
Defaults to "xlsx".
|
| 116 |
+
|
| 117 |
+
Returns:
|
| 118 |
+
str: A success label.
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
new_data = {"question": [ques], "answer": [ans]}
|
| 122 |
+
df_new = pd.DataFrame(new_data)
|
| 123 |
+
|
| 124 |
+
filepath = f"data/testing_data.{file_type}"
|
| 125 |
+
|
| 126 |
+
if Path(filepath).is_file():
|
| 127 |
+
df_existing = pd.read_excel(filepath) if file_type == "xlsx" else pd.read_csv(filepath)
|
| 128 |
+
df_combined = pd.concat([df_existing, df_new], ignore_index=True)
|
| 129 |
+
else:
|
| 130 |
+
df_combined = df_new
|
| 131 |
+
|
| 132 |
+
if file_type == "xlsx":
|
| 133 |
+
df_combined.to_excel(filepath, index=False)
|
| 134 |
+
elif file_type == "csv":
|
| 135 |
+
df_combined.to_csv(filepath, index=False)
|
| 136 |
+
|
| 137 |
+
return gr.Label(value="Successfully saved in local folder.", visible=True)
|
| 138 |
+
|
| 139 |
+
import pandas as pd
|
| 140 |
+
from pathlib import Path
|
| 141 |
+
|
| 142 |
+
def save_emb_data(loss_function, first_input, second_input, third_input, file_type="xlsx"):
|
| 143 |
+
"""
|
| 144 |
+
Saves embedding data based on the specified loss function to either an Excel
|
| 145 |
+
file (xlsx) or a CSV file (csv).
|
| 146 |
+
|
| 147 |
+
Args:
|
| 148 |
+
loss_function (str): The name of the loss function.
|
| 149 |
+
first_input: The first input data.
|
| 150 |
+
second_input: The second input data.
|
| 151 |
+
third_input: The third input data.
|
| 152 |
+
file_type (str, optional): The file type to save to ("xlsx" or "csv").
|
| 153 |
+
Defaults to "xlsx".
|
| 154 |
+
|
| 155 |
+
Returns:
|
| 156 |
+
str: A success message indicating whether data was appended or a new file
|
| 157 |
+
was created.
|
| 158 |
+
"""
|
| 159 |
+
|
| 160 |
+
if loss_function == "MultipleNegativesRankingLoss":
|
| 161 |
+
data = pd.DataFrame({
|
| 162 |
+
"anchor": [first_input],
|
| 163 |
+
"positive": [second_input],
|
| 164 |
+
"negative": [third_input]
|
| 165 |
+
})
|
| 166 |
+
elif loss_function == "OnlineContrastiveLoss":
|
| 167 |
+
data = pd.DataFrame({
|
| 168 |
+
"sentence1": [first_input],
|
| 169 |
+
"sentence2": [second_input],
|
| 170 |
+
"label": [third_input]
|
| 171 |
+
})
|
| 172 |
+
elif loss_function == "CoSENTLoss":
|
| 173 |
+
data = pd.DataFrame({
|
| 174 |
+
"sentence1": [first_input],
|
| 175 |
+
"sentence2": [second_input],
|
| 176 |
+
"score": [third_input]
|
| 177 |
+
})
|
| 178 |
+
elif loss_function == "GISTEmbedLoss":
|
| 179 |
+
data = pd.DataFrame({
|
| 180 |
+
"anchor": [first_input],
|
| 181 |
+
"positive": [second_input],
|
| 182 |
+
"negative": [third_input]
|
| 183 |
+
})
|
| 184 |
+
elif loss_function == "TripletLoss":
|
| 185 |
+
data = pd.DataFrame({
|
| 186 |
+
"anchor": [first_input],
|
| 187 |
+
"positive": [second_input],
|
| 188 |
+
"negative": [third_input]
|
| 189 |
+
})
|
| 190 |
+
|
| 191 |
+
filepath = f"data/emb_data.{file_type}"
|
| 192 |
+
|
| 193 |
+
try:
|
| 194 |
+
if file_type == "xlsx":
|
| 195 |
+
existing_data = pd.read_excel(filepath)
|
| 196 |
+
elif file_type == "csv":
|
| 197 |
+
existing_data = pd.read_csv(filepath)
|
| 198 |
+
|
| 199 |
+
if list(data.columns) == list(existing_data.columns):
|
| 200 |
+
combined_data = pd.concat([existing_data, data], ignore_index=True)
|
| 201 |
+
if file_type == "xlsx":
|
| 202 |
+
combined_data.to_excel(filepath, index=False)
|
| 203 |
+
elif file_type == "csv":
|
| 204 |
+
combined_data.to_csv(filepath, index=False)
|
| 205 |
+
return "Data appended to existing file!"
|
| 206 |
+
else:
|
| 207 |
+
if file_type == "xlsx":
|
| 208 |
+
data.to_excel(filepath, index=False)
|
| 209 |
+
elif file_type == "csv":
|
| 210 |
+
data.to_csv(filepath, index=False)
|
| 211 |
+
return "Data saved to a new file (overwritten)!"
|
| 212 |
+
|
| 213 |
+
except FileNotFoundError:
|
| 214 |
+
if file_type == "xlsx":
|
| 215 |
+
data.to_excel(filepath, index=False)
|
| 216 |
+
elif file_type == "csv":
|
| 217 |
+
data.to_csv(filepath, index=False)
|
| 218 |
+
return "Data saved to a new file!"
|
| 219 |
+
|
| 220 |
+
def parse_data_func(link_temp,progress=gr.Progress()):
|
| 221 |
+
progress(0, desc="Starting...")
|
| 222 |
+
parse_data(link_temp,progress)
|
| 223 |
+
gr.Info("Finished parsing!! Save as a docx file.")
|
| 224 |
+
|
| 225 |
+
def next_ques(ques,ans):
|
| 226 |
+
ques_temp,ans_temp=random_ques_ans2()
|
| 227 |
+
return gr.Label(value=ques_temp)
|
| 228 |
+
|
| 229 |
+
with gr.Blocks(title="LLM QA Chatbot Builder",theme=gr.themes.Soft()) as demo:
|
| 230 |
+
gr.Markdown("""
|
| 231 |
+
# LLM QA Chatbot Builder
|
| 232 |
+
""")
|
| 233 |
+
with gr.Tab("Data Collection"):
|
| 234 |
+
gr.Markdown(""" # Instructions:
|
| 235 |
+
In this page you can prepare data for LLM fine-tuning, testing and embedding model finetuning your model. The data can be provided through Excel file or CSV file or directly via web interface. Additionally, data can be parsed from the target website (Data parsing for RAG) to further enhance the model performance.
|
| 236 |
+
|
| 237 |
+
## 1. If you want to provide data in Excel file or CSV file for model fine-tuning and testing.
|
| 238 |
+
- Create an Excel or CSV file in the data folder and name it `finetune_data.xlsx` or `finetune_data.csv` for finetuning the model.
|
| 239 |
+
- Create an Excel or CSV file in the data folder and name it `testing_data.xlsx` or `testing_data.csv` for generating answers using the fine-tuned model.
|
| 240 |
+
- `finetune_data.xlsx` or `finetune_data.csv` has two columns: `question` and `answer`. `testing_data.xlsx` or `testing_data.csv` has three columns: `question`, `ground_truth` ,`context`.
|
| 241 |
+
""")
|
| 242 |
+
gr.Markdown("""
|
| 243 |
+
## `finetune_data.xlsx` | `finetune_data.csv`
|
| 244 |
+
""")
|
| 245 |
+
gr.HTML(value=display_table(), label="finetune_data.xlsx or finetune_data.csv")
|
| 246 |
+
gr.Markdown("""
|
| 247 |
+
## `testing_data.xlsx` | `testing_data.csv`
|
| 248 |
+
""")
|
| 249 |
+
gr.HTML(value=display_table("data/demo_test_data.xlsx"), label="testing_data.xlsx or testing_data.csv")
|
| 250 |
+
gr.Markdown("""
|
| 251 |
+
## 2. You can use the below interface to create the dataset for training and testing models.
|
| 252 |
+
""")
|
| 253 |
+
|
| 254 |
+
#Training data generation
|
| 255 |
+
with gr.Tab("Training Data Generation"):
|
| 256 |
+
with gr.Tab("Existing Questions"):
|
| 257 |
+
gr.Markdown("""
|
| 258 |
+
Existing questions are provided by the administrator and placed in the data folder named `existing_dataset.xlsx`. This file has only one column: `question`.
|
| 259 |
+
After clicking the `Save the Answer` button. Those questions and answers are saved in the `data` folder as a `finetune_data.xlsx` file.
|
| 260 |
+
""")
|
| 261 |
+
ques_temp,ans_temp=random_ques_ans2()
|
| 262 |
+
with gr.Row():
|
| 263 |
+
ques=gr.Label(value=ques_temp,label="Question")
|
| 264 |
+
with gr.Row():
|
| 265 |
+
ans=gr.TextArea(label="Answer")
|
| 266 |
+
with gr.Row():
|
| 267 |
+
with gr.Row():
|
| 268 |
+
type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
|
| 269 |
+
save_training = gr.Button(value="Save")
|
| 270 |
+
question = gr.Button("Generate New Question")
|
| 271 |
+
with gr.Row():
|
| 272 |
+
lab=gr.Label(visible=False)
|
| 273 |
+
question.click(next_ques,None,ques)
|
| 274 |
+
save_training.click(save_the_ques,[ques,ans,type_options],lab)
|
| 275 |
+
|
| 276 |
+
with gr.Tab("Custom Questions"):
|
| 277 |
+
gr.Markdown("""
|
| 278 |
+
After clicking the `save the answer` button. Those questions and answers are saved in the `data` folder as a `finetune_data.xlsx` file.
|
| 279 |
+
""")
|
| 280 |
+
with gr.Row():
|
| 281 |
+
ques=gr.Textbox(label="Question")
|
| 282 |
+
with gr.Row():
|
| 283 |
+
ans=gr.TextArea(label="Answer")
|
| 284 |
+
with gr.Row():
|
| 285 |
+
with gr.Row():
|
| 286 |
+
type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
|
| 287 |
+
save_training = gr.Button(value="Save")
|
| 288 |
+
with gr.Row():
|
| 289 |
+
lab=gr.Label(visible=False,value="You answer is submitted!!! Thank you for your contribution.",label="Submitted")
|
| 290 |
+
save_training.click(save_the_ques,[ques,ans,type_options],lab)
|
| 291 |
+
|
| 292 |
+
### Testing data generation
|
| 293 |
+
with gr.Tab("Testing Data Generation"):
|
| 294 |
+
gr.Markdown("""
|
| 295 |
+
You can create test data for generating answers using the fine-tune model, which will be used for testing the model's performance.
|
| 296 |
+
After clicking the `Save the Answer` button. Those questions and answers are saved in the `data` folder as a `testing_data.xlsx` file.
|
| 297 |
+
""")
|
| 298 |
+
with gr.Row():
|
| 299 |
+
ques=gr.Textbox(label="Question")
|
| 300 |
+
with gr.Row():
|
| 301 |
+
ans=gr.TextArea(label="Ground Truth")
|
| 302 |
+
with gr.Row():
|
| 303 |
+
ans=gr.TextArea(label="Contexts")
|
| 304 |
+
with gr.Row():
|
| 305 |
+
with gr.Row():
|
| 306 |
+
type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
|
| 307 |
+
save_test = gr.Button(value="Save")
|
| 308 |
+
with gr.Row():
|
| 309 |
+
lab=gr.Label(visible=False,value="You answer is submitted!!! Thank you for your contribution.",label="Submitted")
|
| 310 |
+
save_test.click(save_the_ques_test,[ques,ans,type_options],None)
|
| 311 |
+
|
| 312 |
+
## Embedding data generation
|
| 313 |
+
def update_fields(loss_function):
|
| 314 |
+
if loss_function == "MultipleNegativesRankingLoss":
|
| 315 |
+
first_input = gr.Textbox(label="Anchor", visible=True, placeholder="The sentence to be embedded.")
|
| 316 |
+
second_input = gr.Textbox(label="Positive", visible=True, placeholder="A sentence semantically similar to the anchor.")
|
| 317 |
+
third_input = gr.Textbox(label="Negative", visible=True, placeholder="A sentence semantically dissimilar to the anchor.")
|
| 318 |
+
markdown = gr.Markdown(
|
| 319 |
+
"""
|
| 320 |
+
**MultipleNegativesRankingLoss:**
|
| 321 |
+
Expects data with columns: `anchor`, `positive`, `negative`.
|
| 322 |
+
- `anchor`: The sentence to be embedded.
|
| 323 |
+
- `positive`: A sentence semantically similar to the anchor.
|
| 324 |
+
- `negative`: A sentence semantically dissimilar to the anchor.""",
|
| 325 |
+
visible=True
|
| 326 |
+
)
|
| 327 |
+
elif loss_function == "OnlineContrastiveLoss":
|
| 328 |
+
first_input = gr.Textbox(label="Sentence 1", visible=True, placeholder="The first sentence.")
|
| 329 |
+
second_input = gr.Textbox(label="Sentence 2", visible=True, placeholder="The second sentence.")
|
| 330 |
+
third_input = gr.Textbox(label="Label", visible=True, placeholder="1 if the sentences are similar, 0 if dissimilar.")
|
| 331 |
+
markdown = gr.Markdown(
|
| 332 |
+
"""
|
| 333 |
+
**OnlineContrastiveLoss:**
|
| 334 |
+
Expects data with columns: `sentence1`, `sentence2`, `label`.
|
| 335 |
+
- `sentence1`, `sentence2`: Pairs of sentences.
|
| 336 |
+
- `label`: 1 if the sentences are similar, 0 if dissimilar.""",
|
| 337 |
+
visible=True
|
| 338 |
+
)
|
| 339 |
+
elif loss_function == "CoSENTLoss":
|
| 340 |
+
first_input = gr.Textbox(label="Sentence 1", visible=True, placeholder="The first sentence.")
|
| 341 |
+
second_input = gr.Textbox(label="Sentence 2", visible=True, placeholder="The second sentence.")
|
| 342 |
+
third_input = gr.Textbox(label="Score", visible=True, placeholder="A float value (e.g., 0-1) representing their similarity.")
|
| 343 |
+
markdown = gr.Markdown(
|
| 344 |
+
"""
|
| 345 |
+
**CoSENTLoss:**
|
| 346 |
+
Expects data with columns: `sentence1`, `sentence2`, `score`.
|
| 347 |
+
- `sentence1`, `sentence2`: Pairs of sentences.
|
| 348 |
+
- `score`: A float value (e.g., 0-1) representing their similarity.""",
|
| 349 |
+
visible=True
|
| 350 |
+
)
|
| 351 |
+
elif loss_function == "GISTEmbedLoss":
|
| 352 |
+
first_input = gr.Textbox(label="Anchor", visible=True, placeholder="The sentence to be embedded.")
|
| 353 |
+
second_input = gr.Textbox(label="Positive", visible=True, placeholder="A sentence semantically similar to the anchor.")
|
| 354 |
+
third_input = gr.Textbox(label="Negative", visible=True, placeholder="A sentence semantically dissimilar to the anchor. Can be empty.")
|
| 355 |
+
markdown = gr.Markdown(
|
| 356 |
+
"""
|
| 357 |
+
**GISTEmbedLoss:**
|
| 358 |
+
Expects data with either:
|
| 359 |
+
- Columns: `anchor`, `positive`, `negative` (like TripletLoss).
|
| 360 |
+
- Columns: `anchor`, `positive` (for pairs of similar sentences).""",
|
| 361 |
+
visible=True
|
| 362 |
+
)
|
| 363 |
+
elif loss_function == "TripletLoss":
|
| 364 |
+
first_input = gr.Textbox(label="Anchor", visible=True, placeholder="The sentence to be embedded.")
|
| 365 |
+
second_input = gr.Textbox(label="Positive", visible=True, placeholder="A sentence semantically similar to the anchor.")
|
| 366 |
+
third_input = gr.Textbox(label="Negative", visible=True, placeholder="A sentence semantically dissimilar to the anchor.")
|
| 367 |
+
markdown = gr.Markdown(
|
| 368 |
+
"""
|
| 369 |
+
**TripletLoss:**
|
| 370 |
+
Expects data with columns: `anchor`, `positive`, `negative`.
|
| 371 |
+
- `anchor`: The sentence to be embedded.
|
| 372 |
+
- `positive`: A sentence semantically similar to the anchor.
|
| 373 |
+
- `negative`: A sentence semantically dissimilar to the anchor.""",
|
| 374 |
+
visible=True
|
| 375 |
+
)
|
| 376 |
+
else:
|
| 377 |
+
first_input = gr.Textbox(visible=False)
|
| 378 |
+
second_input = gr.Textbox(visible=False)
|
| 379 |
+
third_input = gr.Textbox(visible=False)
|
| 380 |
+
markdown = gr.Markdown(visible=False)
|
| 381 |
+
|
| 382 |
+
return first_input, second_input, third_input, markdown
|
| 383 |
+
|
| 384 |
+
with gr.Tab("Embedding Data Generation"):
|
| 385 |
+
gr.Markdown("**Choose a loss function to format your embedding data.**")
|
| 386 |
+
with gr.Row():
|
| 387 |
+
loss_function = gr.Dropdown(
|
| 388 |
+
choices=[
|
| 389 |
+
"MultipleNegativesRankingLoss",
|
| 390 |
+
"OnlineContrastiveLoss",
|
| 391 |
+
"CoSENTLoss",
|
| 392 |
+
"GISTEmbedLoss",
|
| 393 |
+
"TripletLoss",
|
| 394 |
+
],
|
| 395 |
+
label="Select the loss function",
|
| 396 |
+
)
|
| 397 |
+
with gr.Row():
|
| 398 |
+
gr.Markdown("""Format `data/emb_data.xlsx` or `data/emb_data.csv` to the expected data format, according to the selected loss function.
|
| 399 |
+
If the file exists and has matching columns, new data will be appended.
|
| 400 |
+
Otherwise, the file will be overwritten.""")
|
| 401 |
+
with gr.Row():
|
| 402 |
+
loss_info_markdown = gr.Markdown(visible=False)
|
| 403 |
+
with gr.Row():
|
| 404 |
+
first_input = gr.Textbox(label="Anchor", value="",visible=False)
|
| 405 |
+
second_input = gr.Textbox(label="Positive", value="",visible=False)
|
| 406 |
+
third_input = gr.Textbox(label="Negative", value="",visible=False)
|
| 407 |
+
loss_function.change(update_fields, loss_function, [first_input, second_input, third_input,loss_info_markdown])
|
| 408 |
+
with gr.Row():
|
| 409 |
+
with gr.Row():
|
| 410 |
+
type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
|
| 411 |
+
save_emb = gr.Button(value="Save")
|
| 412 |
+
save_emb.click(save_emb_data,[loss_function,first_input,second_input,third_input,type_options])
|
| 413 |
+
|
| 414 |
+
with gr.Row():
|
| 415 |
+
gr.Markdown("""
|
| 416 |
+
## 3. Data parsing for RAG
|
| 417 |
+
""")
|
| 418 |
+
with gr.Row():
|
| 419 |
+
link_temp=gr.Textbox(label="Enter Link to Parse Data for RAG",info="To provide the link for parsing the data from the website, this link can help create RAG data for the model.")
|
| 420 |
+
parse_data_btn=gr.Button("Parse Data")
|
| 421 |
+
from utils import parse_data
|
| 422 |
+
parse_data_btn.click(parse_data_func,link_temp,link_temp)
|
| 423 |
+
|
| 424 |
+
#***************************************************
|
| 425 |
+
with gr.Tab("Fine-tuning"):
|
| 426 |
+
with gr.Tab("Fine-tune LLM"):
|
| 427 |
+
with gr.Row():
|
| 428 |
+
def login_hug(token):
|
| 429 |
+
from huggingface_hub import login
|
| 430 |
+
login(token=token)
|
| 431 |
+
login_hug(os.getenv('HF_TOKEN'))
|
| 432 |
+
gr.Markdown("""
|
| 433 |
+
# Instructions:
|
| 434 |
+
- Required VRAM for training: 24GB, for inference: 16GB.(Mistral, Zepyhr and Lllama)\n
|
| 435 |
+
- Required VRAM for training: 5GB, for inference: 4GB.(Phi,Flan-T5)
|
| 436 |
+
- For fine-tuning a custom model select `custom model` option in `Select the model for fine-tuning` dropdown section. The custom model can be configured by editing the code section.\n
|
| 437 |
+
- After fine-tuning the model, it will be saved in the `models` folder.
|
| 438 |
+
""")
|
| 439 |
+
|
| 440 |
+
def edit_model_parameter(model_name_temp,edit_code,code_temp,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout, progress=gr.Progress()):
|
| 441 |
+
progress(0, desc="Fine-tune started!! please wait ...")
|
| 442 |
+
# write code to files if code was edited
|
| 443 |
+
if edit_code and len(code_temp)!=0:
|
| 444 |
+
if model_name_temp=="Mistral":
|
| 445 |
+
open(r"fine_tune_file/mistral_finetune.py","w").write(code_temp)
|
| 446 |
+
elif model_name_temp=="Zephyr":
|
| 447 |
+
open(r"fine_tune_file/zepyhr_finetune.py","w").write(code_temp)
|
| 448 |
+
elif model_name_temp=="Llama":
|
| 449 |
+
open(r"fine_tune_file/llama_finetune.py","w").write(code_temp)
|
| 450 |
+
elif model_name_temp=="Phi":
|
| 451 |
+
open(r"fine_tune_file/phi_finetune.py","w").write(code_temp)
|
| 452 |
+
elif model_name_temp=="Custom model":
|
| 453 |
+
open(r"fine_tune_file/finetune_file.py","w").write(code_temp)
|
| 454 |
+
# importing just before finetuning, to ensure the latest code is used
|
| 455 |
+
# from fine_tune_file.mistral_finetune import mistral_trainer
|
| 456 |
+
# from fine_tune_file.zepyhr_finetune import zephyr_trainer
|
| 457 |
+
# from fine_tune_file.llama_finetune import llama_trainer
|
| 458 |
+
# from fine_tune_file.phi_finetune import phi_trainer
|
| 459 |
+
from fine_tune_file.finetune_file import custom_model_trainer
|
| 460 |
+
# from fine_tune_file.flant5_finetune import flant5_trainer
|
| 461 |
+
from fine_tune_file.modular_finetune import get_trainer
|
| 462 |
+
# create instance of the finetuning classes and then call the finetune function
|
| 463 |
+
|
| 464 |
+
if model_name_temp=="Custom model":
|
| 465 |
+
gr.Info("Fine-tune started!!!")
|
| 466 |
+
trainer=custom_model_trainer()
|
| 467 |
+
trainer.custom_model_finetune()
|
| 468 |
+
gr.Info("Fine-tune Ended!!!")
|
| 469 |
+
else:
|
| 470 |
+
trainer=get_trainer(model_name_temp)
|
| 471 |
+
gr.Info("Fine-tune started!!!")
|
| 472 |
+
if model_name_temp=="Mistral":
|
| 473 |
+
trainer.mistral_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
|
| 474 |
+
elif model_name_temp=="Zephyr":
|
| 475 |
+
trainer.zepyhr_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
|
| 476 |
+
elif model_name_temp=="Llama":
|
| 477 |
+
trainer.llama_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
|
| 478 |
+
elif model_name_temp=="Phi":
|
| 479 |
+
trainer.phi_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
|
| 480 |
+
elif model_name_temp=="Flant5":
|
| 481 |
+
trainer.flant5_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
|
| 482 |
+
gr.Info("Fine-tune Ended!!!")
|
| 483 |
+
|
| 484 |
+
def code_show(model_name):
|
| 485 |
+
if model_name=="Mistral":
|
| 486 |
+
f=open(r"fine_tune_file/mistral_finetune.py").read()
|
| 487 |
+
return gr.Code(visible=True,value=f,interactive=True,language="python")
|
| 488 |
+
elif model_name=="Zephyr":
|
| 489 |
+
f=open(r"fine_tune_file/zepyhr_finetune.py").read()
|
| 490 |
+
return gr.Code(visible=True,value=f,interactive=True,language="python")
|
| 491 |
+
elif model_name=="Llama":
|
| 492 |
+
f=open(r"fine_tune_file/llama_finetune.py").read()
|
| 493 |
+
return gr.Code(visible=True,value=f,interactive=True,language="python")
|
| 494 |
+
elif model_name=="Phi":
|
| 495 |
+
f=open(r"fine_tune_file/phi_finetune.py").read()
|
| 496 |
+
return gr.Code(visible=True,value=f,interactive=True,language="python")
|
| 497 |
+
elif model_name=="Flant5":
|
| 498 |
+
f=open(r"fine_tune_file/flant5_finetune.py").read()
|
| 499 |
+
return gr.Code(visible=True,value=f,interactive=True,language="python")
|
| 500 |
+
|
| 501 |
+
def custom_model(model_name): # It shows custom model code in the UI.
|
| 502 |
+
if model_name=="Custom model":
|
| 503 |
+
f=open(r"fine_tune_file/finetune_file.py").read()
|
| 504 |
+
return [gr.Code(visible=True,value=f,interactive=True,language="python"),gr.Button(visible=False)]
|
| 505 |
+
else:
|
| 506 |
+
return [gr.Code(visible=False),gr.Button("Advance Code Editing",visible=True)]
|
| 507 |
+
def change_code_fun(code_,model_name):
|
| 508 |
+
if model_name=="Mistral":
|
| 509 |
+
open(r"fine_tune_file/mistral_finetune.py","w").write(code_)
|
| 510 |
+
gr.Info("Successfully saved code!!!")
|
| 511 |
+
elif model_name=="Zephyr":
|
| 512 |
+
open(r"fine_tune_file/zepyhr_finetune.py","w").write(code_)
|
| 513 |
+
gr.Info("Successfully saved code!!!")
|
| 514 |
+
elif model_name=="Llama":
|
| 515 |
+
open(r"fine_tune_file/llama_finetune.py","w").write(code_)
|
| 516 |
+
gr.Info("Successfully saved code!!!")
|
| 517 |
+
elif model_name=="Phi":
|
| 518 |
+
open(r"fine_tune_file/phi_finetune.py","w").write(code_)
|
| 519 |
+
gr.Info("Successfully saved code!!!")
|
| 520 |
+
elif model_name=="Flant5":
|
| 521 |
+
open(r"fine_tune_file/flant5_finetune.py","w").write(code_)
|
| 522 |
+
gr.Info("Successfully saved code!!!")
|
| 523 |
+
|
| 524 |
+
def finetune_emb(model_name, loss_name, epochs = 1, batch_size = 8):
|
| 525 |
+
gr.Info("Embedding model fine-tune is started!!!")
|
| 526 |
+
from embedding_tuner import EmbeddingFinetuner
|
| 527 |
+
finetuner = EmbeddingFinetuner(
|
| 528 |
+
model_name=model_name,
|
| 529 |
+
loss_function=loss_name,
|
| 530 |
+
epochs=epochs,
|
| 531 |
+
batch_size=batch_size,
|
| 532 |
+
)
|
| 533 |
+
success = finetuner.train()
|
| 534 |
+
if success:
|
| 535 |
+
gr.Info("Embedding model fine-tune finished!!!")
|
| 536 |
+
|
| 537 |
+
with gr.Row():
|
| 538 |
+
code_temp=gr.Code(visible=False)
|
| 539 |
+
with gr.Row():
|
| 540 |
+
model_name=gr.Dropdown(choices=["Mistral","Zephyr","Llama","Phi","Flant5","Custom model"],label="Select the LLM for fine-tuning")
|
| 541 |
+
with gr.Accordion("Parameter Setup"):
|
| 542 |
+
with gr.Row():
|
| 543 |
+
lr=gr.Number(label="learning_rate",value=5e-6,interactive=True,info="The step size at which the model parameters are updated during training. It controls the magnitude of the updates to the model's weights.")
|
| 544 |
+
epoch=gr.Number(label="epochs",value=2,interactive=True,info="One complete pass through the entire training dataset during the training process. It's a measure of how many times the algorithm has seen the entire dataset.")
|
| 545 |
+
batch_size=gr.Number(label="batch_size",value=4,interactive=True,info="The number of training examples used in one iteration of training. It affects the speed and stability of the training process.")
|
| 546 |
+
gradient_accumulation = gr.Number(info="Gradient accumulation involves updating model weights after accumulating gradients over multiple batches, instead of after each individual batch.",label="gradient_accumulation",value=4,interactive=True)
|
| 547 |
+
with gr.Row():
|
| 548 |
+
quantization = gr.Dropdown(info="Quantization is a technique used to reduce the precision of numerical values, typically from 32-bit floating-point numbers to lower bit representations.",label="quantization",choices=[4,8],value=8,interactive=True)
|
| 549 |
+
lora_r = gr.Number(info="LoRA_r is a hyperparameter associated with the rank of the low-rank approximation used in LoRA.",label="lora_r",value=16,interactive=True)
|
| 550 |
+
lora_alpha = gr.Number(info="LoRA_alpha is a hyperparameter used in LoRA for controlling the strength of the adaptation.",label="lora_alpha",value=32,interactive=True)
|
| 551 |
+
lora_dropout = gr.Number(info="LoRA_dropout is a hyperparameter used in LoRA to control the dropout rate during fine-tuning.",label="lora_dropout",value=.05,interactive=True)
|
| 552 |
+
with gr.Row():
|
| 553 |
+
edit_code=gr.Button("Advance Code Editing")
|
| 554 |
+
with gr.Row():
|
| 555 |
+
code_temp=gr.Code(visible=False)
|
| 556 |
+
with gr.Row():
|
| 557 |
+
parameter_alter=gr.Button("Fine-tune")
|
| 558 |
+
with gr.Row():
|
| 559 |
+
fin_com=gr.Label(visible=False)
|
| 560 |
+
edit_code.click(code_show,model_name,code_temp)
|
| 561 |
+
# On click finetune button
|
| 562 |
+
parameter_alter.click(edit_model_parameter,[model_name,edit_code,code_temp,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout],model_name)
|
| 563 |
+
model_name.change(custom_model,model_name,[code_temp,edit_code])
|
| 564 |
+
with gr.Tab("Embedding model"):
|
| 565 |
+
with gr.Row():
|
| 566 |
+
embedding_model = gr.Dropdown(
|
| 567 |
+
choices=[
|
| 568 |
+
"BAAI/bge-base-en-v1.5",
|
| 569 |
+
"dunzhang/stella_en_1.5B_v5",
|
| 570 |
+
"dunzhang/stella_en_400M_v5",
|
| 571 |
+
"nvidia/NV-Embed-v2",
|
| 572 |
+
"Alibaba-NLP/gte-Qwen2-1.5B-instruct",
|
| 573 |
+
],
|
| 574 |
+
label="Select the embedding model for fine-tuning",
|
| 575 |
+
)
|
| 576 |
+
loss_function = gr.Dropdown(
|
| 577 |
+
choices=[
|
| 578 |
+
"MultipleNegativesRankingLoss",
|
| 579 |
+
"OnlineContrastiveLoss",
|
| 580 |
+
"CoSENTLoss",
|
| 581 |
+
"GISTEmbedLoss",
|
| 582 |
+
"TripletLoss",
|
| 583 |
+
],
|
| 584 |
+
label="Select the loss function",
|
| 585 |
+
)
|
| 586 |
+
|
| 587 |
+
epoch=gr.Number(label="epochs",value=1,interactive=True,info="One complete pass through the entire training dataset during the training process.")
|
| 588 |
+
batch_size=gr.Number(label="batch_size",value=8,interactive=True,info="The number of training examples used in one iteration of training.")
|
| 589 |
+
with gr.Row():
|
| 590 |
+
btn_emb = gr.Button("Fine-tune the embedding model")
|
| 591 |
+
|
| 592 |
+
|
| 593 |
+
# with gr.Row():
|
| 594 |
+
# with gr.Accordion(label="Expected data format according to loss function"):
|
| 595 |
+
# loss_info = gr.Markdown(
|
| 596 |
+
# """
|
| 597 |
+
# # Expected data format according to loss function:
|
| 598 |
+
# ### Format `data/emb_data.xlsx` | `data/emb_data.xlsx` accordingly.
|
| 599 |
+
|
| 600 |
+
# **MultipleNegativesRankingLoss:**
|
| 601 |
+
# Expects data with columns: `anchor`, `positive`, `negative`.
|
| 602 |
+
# - `anchor`: The sentence to be embedded.
|
| 603 |
+
# - `positive`: A sentence semantically similar to the anchor.
|
| 604 |
+
# - `negative`: A sentence semantically dissimilar to the anchor.
|
| 605 |
+
|
| 606 |
+
# **OnlineContrastiveLoss:**
|
| 607 |
+
# Expects data with columns: `sentence1`, `sentence2`, `label`.
|
| 608 |
+
# - `sentence1`, `sentence2`: Pairs of sentences.
|
| 609 |
+
# - `label`: 1 if the sentences are similar, 0 if dissimilar.
|
| 610 |
+
|
| 611 |
+
# **CoSENTLoss:**
|
| 612 |
+
# Expects data with columns: `sentence1`, `sentence2`, `score`.
|
| 613 |
+
# - `sentence1`, `sentence2`: Pairs of sentences.
|
| 614 |
+
# - `score`: A float value (e.g., 0-1) representing their similarity.
|
| 615 |
+
|
| 616 |
+
# **GISTEmbedLoss:**
|
| 617 |
+
# Expects data with either:
|
| 618 |
+
# - Columns: `anchor`, `positive`, `negative` (like TripletLoss).
|
| 619 |
+
# - Columns: `anchor`, `positive` (for pairs of similar sentences).
|
| 620 |
+
|
| 621 |
+
# **TripletLoss:**
|
| 622 |
+
# Expects data with columns: `anchor`, `positive`, `negative`.
|
| 623 |
+
# - `anchor`: The sentence to be embedded.
|
| 624 |
+
# - `positive`: A sentence semantically similar to the anchor.
|
| 625 |
+
# - `negative`: A sentence semantically dissimilar to the anchor.
|
| 626 |
+
# """
|
| 627 |
+
# )
|
| 628 |
+
|
| 629 |
+
|
| 630 |
+
|
| 631 |
+
btn_emb.click(finetune_emb,[embedding_model, loss_function, epoch, batch_size], None)
|
| 632 |
+
#***************************************************
|
| 633 |
+
with gr.Tab("Testing Data Generation and RAG Customization"):
|
| 634 |
+
from utils import save_params_to_file
|
| 635 |
+
def ans_gen_fun(model_name_local,model_name_online,embedding_name,
|
| 636 |
+
splitter_type_dropdown,chunk_size_slider,
|
| 637 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider,save_as_fav,progress=gr.Progress()):
|
| 638 |
+
if not os.path.exists(os.path.join("data","testing_dataset.xlsx")):
|
| 639 |
+
gr.Warning("You need to create testing dataset first from Data collection.")
|
| 640 |
+
return
|
| 641 |
+
if save_as_fav:
|
| 642 |
+
save_params_to_file(model_name_local,embedding_name,
|
| 643 |
+
splitter_type_dropdown,chunk_size_slider,
|
| 644 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider)
|
| 645 |
+
# if not os.path.exists(model_name_local):
|
| 646 |
+
# gr.Error("Model not found in local folder!!")
|
| 647 |
+
import time
|
| 648 |
+
from model_ret import calculate_rag_metrics
|
| 649 |
+
progress(0, desc="Starting...")
|
| 650 |
+
idx=1
|
| 651 |
+
model_ques_ans_gen=[]
|
| 652 |
+
df_temp=pd.read_excel(r"data/testing_dataset.xlsx")
|
| 653 |
+
infer_model = model_chain(model_name_local,model_name_online,
|
| 654 |
+
True,embedding_name,splitter_type_dropdown,chunk_size_slider,
|
| 655 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider)
|
| 656 |
+
# rag_chain=infer_model.rag_chain_ret()
|
| 657 |
+
print("Processing test dataset...")
|
| 658 |
+
for x in progress.tqdm(df_temp.values):
|
| 659 |
+
model_ques_ans_gen.append({
|
| 660 |
+
"id":idx,
|
| 661 |
+
"question":x[0],
|
| 662 |
+
'answer':infer_model.ans_ret(x[0]),
|
| 663 |
+
# "contexts":x[2],
|
| 664 |
+
"ground_truths":x[1]
|
| 665 |
+
})
|
| 666 |
+
idx+=1
|
| 667 |
+
print("Done processing test dataset!")
|
| 668 |
+
model_name = infer_model.model_name.split('/')[-1]
|
| 669 |
+
temp=calculate_rag_metrics(model_ques_ans_gen,model_name)
|
| 670 |
+
# print(temp)
|
| 671 |
+
# temp['Average Rating'] = temp.mean(axis=1)
|
| 672 |
+
pd.DataFrame(temp).to_excel(os.path.join("model_ans",f"_{model_name+cur_time}.xlsx"),index=False)
|
| 673 |
+
rag_metrics = ['answer_correctness', 'answer_similarity', 'answer_relevancy', 'faithfulness', 'context_recall', 'context_precision']
|
| 674 |
+
new_df = pd.DataFrame({'Rag Metric': rag_metrics, 'Average Rating': 0.2})
|
| 675 |
+
|
| 676 |
+
return gr.BarPlot(
|
| 677 |
+
new_df,
|
| 678 |
+
x="Rag Metric",
|
| 679 |
+
y="Average Rating",
|
| 680 |
+
x_title="Rag Metric",
|
| 681 |
+
y_title="Average Rating",
|
| 682 |
+
title="RAG performance",
|
| 683 |
+
tooltip=["Rag Metric", "Average Rating"],
|
| 684 |
+
y_lim=[1, 200],
|
| 685 |
+
width=150,
|
| 686 |
+
# height=1000,
|
| 687 |
+
visible=True
|
| 688 |
+
)
|
| 689 |
+
gr.Info("Generating answer from model is finished!!! Now, it is ready for human evaluation. Model answer is saved in \"model_ans\" folder. ")
|
| 690 |
+
|
| 691 |
+
gr.Markdown(""" # Instructions:\n
|
| 692 |
+
In this page you can generate answer from fine-tuned models for human evaluation. The questions must be created using `Testing data generation` section of `Data collection` tab.
|
| 693 |
+
""")
|
| 694 |
+
with gr.Row():
|
| 695 |
+
embedding_name=gr.Dropdown(choices=["BAAI/bge-base-en-v1.5","dunzhang/stella_en_1.5B_v5","dunzhang/stella_en_400M_v5",
|
| 696 |
+
"nvidia/NV-Embed-v2","Alibaba-NLP/gte-Qwen2-1.5B-instruct"],
|
| 697 |
+
label="Select the Embedding Model")
|
| 698 |
+
splitter_type_dropdown = gr.Dropdown(choices=["character", "recursive", "token"],
|
| 699 |
+
value="character", label="Splitter Type",interactive=True)
|
| 700 |
+
|
| 701 |
+
chunk_size_slider = gr.Slider(minimum=100, maximum=2000, value=500, step=50, label="Chunk Size")
|
| 702 |
+
chunk_overlap_slider = gr.Slider(minimum=0, maximum=500, value=30, step=10, label="Chunk Overlap",interactive=True)
|
| 703 |
+
separator_textbox = gr.Textbox(value="\n", label="Separator (e.g., newline '\\n')",interactive=True)
|
| 704 |
+
max_tokens_slider = gr.Slider(minimum=100, maximum=5000, value=1000, step=100, label="Max Tokens",interactive=True)
|
| 705 |
+
with gr.Row():
|
| 706 |
+
save_as_fav=gr.Checkbox(label="Save this settings as favorite")
|
| 707 |
+
inf_checkbox=gr.Checkbox(label="Do you want to use without fine-tuned model from Hugging face?")
|
| 708 |
+
model_name_local=gr.Dropdown(visible=False)
|
| 709 |
+
model_name_online=gr.Dropdown(visible=False)
|
| 710 |
+
def model_online_local_show(inf_checkbox):
|
| 711 |
+
if inf_checkbox:
|
| 712 |
+
return [gr.Dropdown(visible=False),
|
| 713 |
+
gr.Dropdown(choices=llama_models,
|
| 714 |
+
label="Select the LLM from Huggingface",visible=True)]
|
| 715 |
+
else:
|
| 716 |
+
return [gr.Dropdown(choices=os.listdir("models"),label="Select the fine-tuned LLM",visible=True),
|
| 717 |
+
gr.Dropdown(visible=False)]
|
| 718 |
+
inf_checkbox.change(model_online_local_show,[inf_checkbox],[model_name_local,model_name_online])
|
| 719 |
+
|
| 720 |
+
with gr.Row():
|
| 721 |
+
ans_gen=gr.Button("Generate Answer for Testing Dataset")
|
| 722 |
+
with gr.Row():
|
| 723 |
+
plot = gr.BarPlot(visible=False)
|
| 724 |
+
ans_gen.click(ans_gen_fun,[model_name_local,model_name_online,embedding_name,
|
| 725 |
+
splitter_type_dropdown,chunk_size_slider,
|
| 726 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider,save_as_fav],plot)
|
| 727 |
+
#***************************************************Human evaluation
|
| 728 |
+
import secrets
|
| 729 |
+
|
| 730 |
+
def generate_token():
|
| 731 |
+
while True:
|
| 732 |
+
token=secrets.token_hex(6)
|
| 733 |
+
f=[x[:-5] for x in os.listdir("save_ques_ans")]
|
| 734 |
+
if token not in f:
|
| 735 |
+
data = {
|
| 736 |
+
'id': [],
|
| 737 |
+
'question': [],
|
| 738 |
+
'answer': []
|
| 739 |
+
}
|
| 740 |
+
df = pd.DataFrame(data)
|
| 741 |
+
df.to_excel("save_ques_ans//"+str(token)+".xlsx", index=False)
|
| 742 |
+
return gr.Label(label="Please keep the token for tracking question answer data",value=token,visible=True)
|
| 743 |
+
|
| 744 |
+
def bar_plot_fn():
|
| 745 |
+
temp=score_report_bar()
|
| 746 |
+
return gr.BarPlot(
|
| 747 |
+
temp,
|
| 748 |
+
x="Model Name",
|
| 749 |
+
y="Average Rating",
|
| 750 |
+
x_title="Model name",
|
| 751 |
+
y_title="Average Rating",
|
| 752 |
+
title="Model performance",
|
| 753 |
+
tooltip=["Model Name", "Average Rating"],
|
| 754 |
+
y_lim=[1, 200],
|
| 755 |
+
width=150,
|
| 756 |
+
# height=1000,
|
| 757 |
+
visible=True
|
| 758 |
+
)
|
| 759 |
+
with gr.Tab("Human Evaluation"):
|
| 760 |
+
def answer_updated(model_ans):
|
| 761 |
+
df_ques_ans=pd.read_excel(os.path.join("model_ans",str(model_ans)))
|
| 762 |
+
num=0
|
| 763 |
+
print(df_ques_ans['id'][num],"**"*10)
|
| 764 |
+
return [gr.Markdown(value=f"""# Model_name: {model_ans}
|
| 765 |
+
# Number of questions: {len(df_ques_ans)}""",visible=True),
|
| 766 |
+
gr.Label(value=str(df_ques_ans['id'][num])),
|
| 767 |
+
gr.Label(value=str(df_ques_ans['question'][num])),
|
| 768 |
+
gr.Label(value=str(df_ques_ans['answer'][num])),
|
| 769 |
+
gr.Dropdown(visible=False),
|
| 770 |
+
gr.Button(visible=False)
|
| 771 |
+
]
|
| 772 |
+
|
| 773 |
+
with gr.Row():
|
| 774 |
+
new_user=gr.Button("New User")
|
| 775 |
+
with gr.Row():
|
| 776 |
+
new_user_token=gr.Label(visible=False)
|
| 777 |
+
with gr.Row():
|
| 778 |
+
token_key=gr.Textbox(label="Enter your Token")
|
| 779 |
+
model_ans=gr.Dropdown(choices=os.listdir("model_ans"),label="Select the Model Answer for Human Evaluation")
|
| 780 |
+
btn_1=gr.Button("Submit")
|
| 781 |
+
gr.Markdown(""" # Instructions:
|
| 782 |
+
In this section, humans evaluate the answers of the model given specific questions. Each answer is rated between 1 and 5 by anonymous students.
|
| 783 |
+
Those values are saved in the `scrore_report` folder.
|
| 784 |
+
""")
|
| 785 |
+
lab_temp=gr.Markdown(visible=False)
|
| 786 |
+
|
| 787 |
+
with gr.Row():
|
| 788 |
+
id=gr.Label(value="",label="ID")
|
| 789 |
+
with gr.Row():
|
| 790 |
+
ques=gr.Label(value="",label="Question")
|
| 791 |
+
with gr.Row():
|
| 792 |
+
ans=gr.Label(value="",label="Answer")
|
| 793 |
+
with gr.Row():
|
| 794 |
+
score = gr.Radio(choices=[1,2,3,4,5],label="Rating")
|
| 795 |
+
with gr.Row():
|
| 796 |
+
human_ans_btn=gr.Button("Show Answer From Other Evaluators")
|
| 797 |
+
with gr.Row():
|
| 798 |
+
human_ans_lab=gr.Label(label="Human Answer",visible=False)
|
| 799 |
+
with gr.Row():
|
| 800 |
+
btn = gr.Button("Save")
|
| 801 |
+
question = gr.Button("Skip")
|
| 802 |
+
# with gr.Row():
|
| 803 |
+
# save_all_btn=gr.Button("Save all the data in dataframe")
|
| 804 |
+
# with gr.Row():
|
| 805 |
+
# move=gr.Number(label="Move to the question")
|
| 806 |
+
# move_btn=gr.Button("move")
|
| 807 |
+
with gr.Row():
|
| 808 |
+
btn_plot=gr.Button("Plot Generation")
|
| 809 |
+
with gr.Row():
|
| 810 |
+
plot = gr.BarPlot(visible=False)
|
| 811 |
+
btn_plot.click(bar_plot_fn, None, outputs=plot)
|
| 812 |
+
btn_1.click(answer_updated,model_ans,[lab_temp,id,ques,ans,model_ans,btn_1])
|
| 813 |
+
btn.click(score_save, inputs=[ques,ans,score,model_ans,token_key], outputs=[id,ques,ans])
|
| 814 |
+
question.click(new_ques,model_ans,[id,ques,ans])
|
| 815 |
+
# save_all_btn.click(save_all,model_ans,None)
|
| 816 |
+
# move_btn.click(move_to,[move,model_ans],[id,ques,ans])
|
| 817 |
+
def human_ans_func(id, ques):
|
| 818 |
+
temp=all_contri_ans(id,ques)
|
| 819 |
+
return [gr.Button("Show Answer from Other Evaluators"),gr.Label(value="\n".join(temp),visible=True)]
|
| 820 |
+
human_ans_btn.click(human_ans_func,[id, ques],[human_ans_btn,human_ans_lab])
|
| 821 |
+
new_user.click(generate_token,None,new_user_token)
|
| 822 |
+
|
| 823 |
+
#***************************************************
|
| 824 |
+
infer_ragchain=None
|
| 825 |
+
with gr.Tab("Inference"):
|
| 826 |
+
def echo(message, history,model_name_local,model_name_online,
|
| 827 |
+
inf_checkbox,embedding_name,splitter_type_dropdown,chunk_size_slider,
|
| 828 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider):
|
| 829 |
+
global infer_ragchain
|
| 830 |
+
if infer_ragchain is None:
|
| 831 |
+
gr.Info("Please wait!!! model is loading!!")
|
| 832 |
+
if inf_checkbox:
|
| 833 |
+
gr.Info("Model is loading from Huggingface!!")
|
| 834 |
+
infer_ragchain = model_chain(model_name_local,model_name_online,
|
| 835 |
+
inf_checkbox,embedding_name,splitter_type_dropdown,chunk_size_slider,
|
| 836 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider)
|
| 837 |
+
# rag_chain=infer_ragchain.rag_chain_ret()
|
| 838 |
+
return infer_ragchain.ans_ret(message)
|
| 839 |
+
from utils import load_params_from_file
|
| 840 |
+
saved_params = load_params_from_file()
|
| 841 |
+
# If saved parameters exist, use them; otherwise, set default values
|
| 842 |
+
default_model_name = saved_params['model_name'] if saved_params else "Llama"
|
| 843 |
+
default_embedding_name = saved_params['embedding_name'] if saved_params else "BAAI/bge-base-en-v1.5"
|
| 844 |
+
default_splitter_type = saved_params['splitter_type_dropdown'] if saved_params else "character"
|
| 845 |
+
default_chunk_size = saved_params['chunk_size_slider'] if saved_params else 500
|
| 846 |
+
default_chunk_overlap = saved_params['chunk_overlap_slider'] if saved_params else 30
|
| 847 |
+
default_separator = saved_params['separator_textbox'] if saved_params else "\n"
|
| 848 |
+
default_max_tokens = saved_params['max_tokens_slider'] if saved_params else 1000
|
| 849 |
+
# with gr.Row():
|
| 850 |
+
with gr.Row():
|
| 851 |
+
def login_hug(token):
|
| 852 |
+
from huggingface_hub import login
|
| 853 |
+
login(token=token)
|
| 854 |
+
login_hug(os.getenv('HF_TOKEN'))
|
| 855 |
+
embedding_name=gr.Dropdown(choices=["BAAI/bge-base-en-v1.5","dunzhang/stella_en_1.5B_v5","dunzhang/stella_en_400M_v5",
|
| 856 |
+
"nvidia/NV-Embed-v2","Alibaba-NLP/gte-Qwen2-1.5B-instruct"],value=default_embedding_name,
|
| 857 |
+
label="Select the Embedding Model")
|
| 858 |
+
splitter_type_dropdown = gr.Dropdown(choices=["character", "recursive", "token"],
|
| 859 |
+
value=default_splitter_type, label="Splitter Type",interactive=True)
|
| 860 |
+
|
| 861 |
+
chunk_size_slider = gr.Slider(minimum=100, maximum=2000, value=default_chunk_size, step=50, label="Chunk Size")
|
| 862 |
+
chunk_overlap_slider = gr.Slider(minimum=0, maximum=500, value=default_chunk_overlap, step=10, label="Chunk Overlap",interactive=True)
|
| 863 |
+
separator_textbox = gr.Textbox(value=default_separator, label="Separator (e.g., newline '\\n')",interactive=True)
|
| 864 |
+
max_tokens_slider = gr.Slider(minimum=100, maximum=5000, value=default_max_tokens, step=100, label="Max Tokens",interactive=True)
|
| 865 |
+
|
| 866 |
+
inf_checkbox=gr.Checkbox(label="Do you want to use without fine-tuned model from Hugging face?")
|
| 867 |
+
model_name_local=gr.Dropdown(visible=False)
|
| 868 |
+
model_name_online=gr.Dropdown(visible=False)
|
| 869 |
+
def model_online_local_show(inf_checkbox):
|
| 870 |
+
if inf_checkbox:
|
| 871 |
+
return [gr.Dropdown(visible=False),
|
| 872 |
+
gr.Dropdown(choices=llama_models,
|
| 873 |
+
label="Select the LLM from Huggingface",visible=True)]
|
| 874 |
+
else:
|
| 875 |
+
return [gr.Dropdown(choices=os.listdir("models"),label="Select the fine-tuned LLM",visible=True),
|
| 876 |
+
gr.Dropdown(visible=False)]
|
| 877 |
+
|
| 878 |
+
inf_checkbox.change(model_online_local_show,[inf_checkbox],[model_name_local,model_name_online])
|
| 879 |
+
gr.ChatInterface(fn=echo,
|
| 880 |
+
additional_inputs=[model_name_local,model_name_online,inf_checkbox,embedding_name,
|
| 881 |
+
splitter_type_dropdown,chunk_size_slider,
|
| 882 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider],
|
| 883 |
+
title="Chatbot")
|
| 884 |
+
#----------------------------------------------
|
| 885 |
+
with gr.Tab("Deployment"):
|
| 886 |
+
gr.Markdown("""`deploy` folder has all the code for the deployment of the model.
|
| 887 |
+
For installing dependencies use the following command: `pip install -r requirements.txt`.
|
| 888 |
+
""")
|
| 889 |
+
def deploy_func(model_name):
|
| 890 |
+
import shutil
|
| 891 |
+
import os
|
| 892 |
+
src_folder = 'src'
|
| 893 |
+
deploy_folder = 'deploy'
|
| 894 |
+
files_to_copy = ['model_ret.py', 'create_retriever.py', 'inference.py']
|
| 895 |
+
for file_name in files_to_copy:
|
| 896 |
+
src_file_path = os.path.join(src_folder, file_name)
|
| 897 |
+
dest_file_path = os.path.join(deploy_folder, file_name)
|
| 898 |
+
param_list=load_params_from_file()
|
| 899 |
+
param_list["model_name"]=model_name
|
| 900 |
+
save_params_to_file(param_list)
|
| 901 |
+
# f.write(f"{model_name}")
|
| 902 |
+
|
| 903 |
+
|
| 904 |
+
model_name=gr.Dropdown(choices=os.listdir("models"),label="Select the Model")
|
| 905 |
+
btn_model=gr.Button("Deploy")
|
| 906 |
+
btn_model.click(deploy_func,model_name)
|
| 907 |
+
|
| 908 |
+
demo.launch(share=True, debug=True)
|
| 909 |
+
save_ques_ans=[]
|
| 910 |
+
save_ques_ans_test=[]
|
| 911 |
+
cur_time=current_time()
|
| 912 |
+
|
| 913 |
+
def random_ques_ans(model_ans):
|
| 914 |
+
df_temp=pd.read_excel(os.path.join("model_ans",str(model_ans)))
|
| 915 |
+
global cnt
|
| 916 |
+
id=int((df_temp.loc[cnt])['id'])
|
| 917 |
+
ques_temp=(df_temp.loc[cnt])['question']
|
| 918 |
+
ans_temp=(df_temp.loc[cnt])['answer']
|
| 919 |
+
cnt+=1
|
| 920 |
+
if cnt>=len(df_temp):
|
| 921 |
+
cnt=0
|
| 922 |
+
return ques_temp,ans_temp,id,0
|
| 923 |
+
return ques_temp,ans_temp,id,1
|
| 924 |
+
def save_all(model_ans):
|
| 925 |
+
temp=pd.DataFrame(data)
|
| 926 |
+
temp.to_excel(f"score_report\\{model_ans+cur_time}.xlsx",index=False)
|
| 927 |
+
gr.Info("Sucessfully save all the answer!!!")
|
| 928 |
+
|
| 929 |
+
def score_save(ques,ans,score,model_ans,token_key):
|
| 930 |
+
data.append({
|
| 931 |
+
"question":ques,
|
| 932 |
+
'answer':ans,
|
| 933 |
+
'rating':score
|
| 934 |
+
})
|
| 935 |
+
# if len(data)%5==0:
|
| 936 |
+
temp=pd.DataFrame(data)
|
| 937 |
+
temp.to_excel(f"score_report\\{model_ans+cur_time}.xlsx",index=False)
|
| 938 |
+
gr.Info("Sucessfully saved in local folder!!!")
|
| 939 |
+
ques_temp,ans_temp,id,flag=random_ques_ans(model_ans)
|
| 940 |
+
gr.Info("Your opinion is submitted successfully!!!")
|
| 941 |
+
return gr.Label(value=id,label="ID"),gr.Label(value=ques_temp, label="Question"), gr.Label(value=ans_temp, label="Answer")
|
| 942 |
+
|
| 943 |
+
def new_ques(model_ans):
|
| 944 |
+
ques_temp,ans_temp,id2,flag=random_ques_ans(model_ans)
|
| 945 |
+
return {
|
| 946 |
+
id:gr.Label(value=id2,label="ID"),
|
| 947 |
+
ques:gr.Label(value=ques_temp,label="Question"),
|
| 948 |
+
ans:gr.Label(value=ans_temp,label="Answer")
|
| 949 |
+
}
|
| 950 |
+
|
| 951 |
+
def save_the_ques(ques,ans,file_type = 'xlsx'):
|
| 952 |
+
"""
|
| 953 |
+
Saves a question and answer pair to a specified file (xlsx or csv).
|
| 954 |
+
|
| 955 |
+
Args:
|
| 956 |
+
ques (str): The question.
|
| 957 |
+
ans (str): The answer.
|
| 958 |
+
file_type (str, optional): The file type to save to ("xlsx" or "csv").
|
| 959 |
+
Defaults to "xlsx".
|
| 960 |
+
|
| 961 |
+
Returns:
|
| 962 |
+
str: A success label.
|
| 963 |
+
"""
|
| 964 |
+
|
| 965 |
+
new_data = {"question": [ques], "answer": [ans]}
|
| 966 |
+
df_new = pd.DataFrame(new_data)
|
| 967 |
+
|
| 968 |
+
filepath = f"data/finetune_data.{file_type}"
|
| 969 |
+
|
| 970 |
+
if Path(filepath).is_file():
|
| 971 |
+
df_existing = pd.read_excel(filepath) if file_type == "xlsx" else pd.read_csv(filepath)
|
| 972 |
+
df_combined = pd.concat([df_existing, df_new], ignore_index=True)
|
| 973 |
+
else:
|
| 974 |
+
df_combined = df_new
|
| 975 |
+
|
| 976 |
+
if file_type == "xlsx":
|
| 977 |
+
df_combined.to_excel(filepath, index=False)
|
| 978 |
+
elif file_type == "csv":
|
| 979 |
+
df_combined.to_csv(filepath, index=False)
|
| 980 |
+
|
| 981 |
+
return gr.Label(value="Successfully saved in local folder.", visible=True)
|
| 982 |
+
|
| 983 |
+
def save_the_ques_test(ques, ans, file_type = 'xlsx'):
|
| 984 |
+
"""
|
| 985 |
+
Saves a question and answer pair to a specified file (xlsx or csv).
|
| 986 |
+
|
| 987 |
+
Args:
|
| 988 |
+
ques (str): The question.
|
| 989 |
+
ans (str): The answer.
|
| 990 |
+
file_type (str, optional): The file type to save to ("xlsx" or "csv").
|
| 991 |
+
Defaults to "xlsx".
|
| 992 |
+
|
| 993 |
+
Returns:
|
| 994 |
+
str: A success label.
|
| 995 |
+
"""
|
| 996 |
+
|
| 997 |
+
new_data = {"question": [ques], "answer": [ans]}
|
| 998 |
+
df_new = pd.DataFrame(new_data)
|
| 999 |
+
|
| 1000 |
+
filepath = f"data/testing_data.{file_type}"
|
| 1001 |
+
|
| 1002 |
+
if Path(filepath).is_file():
|
| 1003 |
+
df_existing = pd.read_excel(filepath) if file_type == "xlsx" else pd.read_csv(filepath)
|
| 1004 |
+
df_combined = pd.concat([df_existing, df_new], ignore_index=True)
|
| 1005 |
+
else:
|
| 1006 |
+
df_combined = df_new
|
| 1007 |
+
|
| 1008 |
+
if file_type == "xlsx":
|
| 1009 |
+
df_combined.to_excel(filepath, index=False)
|
| 1010 |
+
elif file_type == "csv":
|
| 1011 |
+
df_combined.to_csv(filepath, index=False)
|
| 1012 |
+
|
| 1013 |
+
return gr.Label(value="Successfully saved in local folder.", visible=True)
|
| 1014 |
+
|
| 1015 |
+
import pandas as pd
|
| 1016 |
+
from pathlib import Path
|
| 1017 |
+
|
| 1018 |
+
def save_emb_data(loss_function, first_input, second_input, third_input, file_type="xlsx"):
|
| 1019 |
+
"""
|
| 1020 |
+
Saves embedding data based on the specified loss function to either an Excel
|
| 1021 |
+
file (xlsx) or a CSV file (csv).
|
| 1022 |
+
|
| 1023 |
+
Args:
|
| 1024 |
+
loss_function (str): The name of the loss function.
|
| 1025 |
+
first_input: The first input data.
|
| 1026 |
+
second_input: The second input data.
|
| 1027 |
+
third_input: The third input data.
|
| 1028 |
+
file_type (str, optional): The file type to save to ("xlsx" or "csv").
|
| 1029 |
+
Defaults to "xlsx".
|
| 1030 |
+
|
| 1031 |
+
Returns:
|
| 1032 |
+
str: A success message indicating whether data was appended or a new file
|
| 1033 |
+
was created.
|
| 1034 |
+
"""
|
| 1035 |
+
|
| 1036 |
+
if loss_function == "MultipleNegativesRankingLoss":
|
| 1037 |
+
data = pd.DataFrame({
|
| 1038 |
+
"anchor": [first_input],
|
| 1039 |
+
"positive": [second_input],
|
| 1040 |
+
"negative": [third_input]
|
| 1041 |
+
})
|
| 1042 |
+
elif loss_function == "OnlineContrastiveLoss":
|
| 1043 |
+
data = pd.DataFrame({
|
| 1044 |
+
"sentence1": [first_input],
|
| 1045 |
+
"sentence2": [second_input],
|
| 1046 |
+
"label": [third_input]
|
| 1047 |
+
})
|
| 1048 |
+
elif loss_function == "CoSENTLoss":
|
| 1049 |
+
data = pd.DataFrame({
|
| 1050 |
+
"sentence1": [first_input],
|
| 1051 |
+
"sentence2": [second_input],
|
| 1052 |
+
"score": [third_input]
|
| 1053 |
+
})
|
| 1054 |
+
elif loss_function == "GISTEmbedLoss":
|
| 1055 |
+
data = pd.DataFrame({
|
| 1056 |
+
"anchor": [first_input],
|
| 1057 |
+
"positive": [second_input],
|
| 1058 |
+
"negative": [third_input]
|
| 1059 |
+
})
|
| 1060 |
+
elif loss_function == "TripletLoss":
|
| 1061 |
+
data = pd.DataFrame({
|
| 1062 |
+
"anchor": [first_input],
|
| 1063 |
+
"positive": [second_input],
|
| 1064 |
+
"negative": [third_input]
|
| 1065 |
+
})
|
| 1066 |
+
|
| 1067 |
+
filepath = f"data/emb_data.{file_type}"
|
| 1068 |
+
|
| 1069 |
+
try:
|
| 1070 |
+
if file_type == "xlsx":
|
| 1071 |
+
existing_data = pd.read_excel(filepath)
|
| 1072 |
+
elif file_type == "csv":
|
| 1073 |
+
existing_data = pd.read_csv(filepath)
|
| 1074 |
+
|
| 1075 |
+
if list(data.columns) == list(existing_data.columns):
|
| 1076 |
+
combined_data = pd.concat([existing_data, data], ignore_index=True)
|
| 1077 |
+
if file_type == "xlsx":
|
| 1078 |
+
combined_data.to_excel(filepath, index=False)
|
| 1079 |
+
elif file_type == "csv":
|
| 1080 |
+
combined_data.to_csv(filepath, index=False)
|
| 1081 |
+
return "Data appended to existing file!"
|
| 1082 |
+
else:
|
| 1083 |
+
if file_type == "xlsx":
|
| 1084 |
+
data.to_excel(filepath, index=False)
|
| 1085 |
+
elif file_type == "csv":
|
| 1086 |
+
data.to_csv(filepath, index=False)
|
| 1087 |
+
return "Data saved to a new file (overwritten)!"
|
| 1088 |
+
|
| 1089 |
+
except FileNotFoundError:
|
| 1090 |
+
if file_type == "xlsx":
|
| 1091 |
+
data.to_excel(filepath, index=False)
|
| 1092 |
+
elif file_type == "csv":
|
| 1093 |
+
data.to_csv(filepath, index=False)
|
| 1094 |
+
return "Data saved to a new file!"
|
| 1095 |
+
|
| 1096 |
+
def parse_data_func(link_temp,progress=gr.Progress()):
|
| 1097 |
+
progress(0, desc="Starting...")
|
| 1098 |
+
parse_data(link_temp,progress)
|
| 1099 |
+
gr.Info("Finished parsing!! Save as a docx file.")
|
| 1100 |
+
|
| 1101 |
+
def next_ques(ques,ans):
|
| 1102 |
+
ques_temp,ans_temp=random_ques_ans2()
|
| 1103 |
+
return gr.Label(value=ques_temp)
|
| 1104 |
+
|
| 1105 |
+
with gr.Blocks(title="LLM QA Chatbot Builder",theme=gr.themes.Soft()) as demo:
|
| 1106 |
+
gr.Markdown("""
|
| 1107 |
+
# LLM QA Chatbot Builder
|
| 1108 |
+
""")
|
| 1109 |
+
with gr.Tab("Data Collection"):
|
| 1110 |
+
gr.Markdown(""" # Instructions:
|
| 1111 |
+
In this page you can prepare data for LLM fine-tuning, testing and embedding model finetuning your model. The data can be provided through Excel file or CSV file or directly via web interface. Additionally, data can be parsed from the target website (Data parsing for RAG) to further enhance the model performance.
|
| 1112 |
+
|
| 1113 |
+
## 1. If you want to provide data in Excel file or CSV file for model fine-tuning and testing.
|
| 1114 |
+
- Create an Excel or CSV file in the data folder and name it `finetune_data.xlsx` or `finetune_data.csv` for finetuning the model.
|
| 1115 |
+
- Create an Excel or CSV file in the data folder and name it `testing_data.xlsx` or `testing_data.csv` for generating answers using the fine-tuned model.
|
| 1116 |
+
- `finetune_data.xlsx` or `finetune_data.csv` has two columns: `question` and `answer`. `testing_data.xlsx` or `testing_data.csv` has three columns: `question`, `ground_truth` ,`context`.
|
| 1117 |
+
""")
|
| 1118 |
+
gr.Markdown("""
|
| 1119 |
+
## `finetune_data.xlsx` | `finetune_data.csv`
|
| 1120 |
+
""")
|
| 1121 |
+
gr.HTML(value=display_table(), label="finetune_data.xlsx or finetune_data.csv")
|
| 1122 |
+
gr.Markdown("""
|
| 1123 |
+
## `testing_data.xlsx` | `testing_data.csv`
|
| 1124 |
+
""")
|
| 1125 |
+
gr.HTML(value=display_table("data/demo_test_data.xlsx"), label="testing_data.xlsx or testing_data.csv")
|
| 1126 |
+
gr.Markdown("""
|
| 1127 |
+
## 2. You can use the below interface to create the dataset for training and testing models.
|
| 1128 |
+
""")
|
| 1129 |
+
|
| 1130 |
+
#Training data generation
|
| 1131 |
+
with gr.Tab("Training Data Generation"):
|
| 1132 |
+
with gr.Tab("Existing Questions"):
|
| 1133 |
+
gr.Markdown("""
|
| 1134 |
+
Existing questions are provided by the administrator and placed in the data folder named `existing_dataset.xlsx`. This file has only one column: `question`.
|
| 1135 |
+
After clicking the `Save the Answer` button. Those questions and answers are saved in the `data` folder as a `finetune_data.xlsx` file.
|
| 1136 |
+
""")
|
| 1137 |
+
ques_temp,ans_temp=random_ques_ans2()
|
| 1138 |
+
with gr.Row():
|
| 1139 |
+
ques=gr.Label(value=ques_temp,label="Question")
|
| 1140 |
+
with gr.Row():
|
| 1141 |
+
ans=gr.TextArea(label="Answer")
|
| 1142 |
+
with gr.Row():
|
| 1143 |
+
with gr.Row():
|
| 1144 |
+
type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
|
| 1145 |
+
save_training = gr.Button(value="Save")
|
| 1146 |
+
question = gr.Button("Generate New Question")
|
| 1147 |
+
with gr.Row():
|
| 1148 |
+
lab=gr.Label(visible=False)
|
| 1149 |
+
question.click(next_ques,None,ques)
|
| 1150 |
+
save_training.click(save_the_ques,[ques,ans,type_options],lab)
|
| 1151 |
+
|
| 1152 |
+
with gr.Tab("Custom Questions"):
|
| 1153 |
+
gr.Markdown("""
|
| 1154 |
+
After clicking the `save the answer` button. Those questions and answers are saved in the `data` folder as a `finetune_data.xlsx` file.
|
| 1155 |
+
""")
|
| 1156 |
+
with gr.Row():
|
| 1157 |
+
ques=gr.Textbox(label="Question")
|
| 1158 |
+
with gr.Row():
|
| 1159 |
+
ans=gr.TextArea(label="Answer")
|
| 1160 |
+
with gr.Row():
|
| 1161 |
+
with gr.Row():
|
| 1162 |
+
type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
|
| 1163 |
+
save_training = gr.Button(value="Save")
|
| 1164 |
+
with gr.Row():
|
| 1165 |
+
lab=gr.Label(visible=False,value="You answer is submitted!!! Thank you for your contribution.",label="Submitted")
|
| 1166 |
+
save_training.click(save_the_ques,[ques,ans,type_options],lab)
|
| 1167 |
+
|
| 1168 |
+
### Testing data generation
|
| 1169 |
+
with gr.Tab("Testing Data Generation"):
|
| 1170 |
+
gr.Markdown("""
|
| 1171 |
+
You can create test data for generating answers using the fine-tune model, which will be used for testing the model's performance.
|
| 1172 |
+
After clicking the `Save the Answer` button. Those questions and answers are saved in the `data` folder as a `testing_data.xlsx` file.
|
| 1173 |
+
""")
|
| 1174 |
+
with gr.Row():
|
| 1175 |
+
ques=gr.Textbox(label="Question")
|
| 1176 |
+
with gr.Row():
|
| 1177 |
+
ans=gr.TextArea(label="Ground Truth")
|
| 1178 |
+
with gr.Row():
|
| 1179 |
+
ans=gr.TextArea(label="Contexts")
|
| 1180 |
+
with gr.Row():
|
| 1181 |
+
with gr.Row():
|
| 1182 |
+
type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
|
| 1183 |
+
save_test = gr.Button(value="Save")
|
| 1184 |
+
with gr.Row():
|
| 1185 |
+
lab=gr.Label(visible=False,value="You answer is submitted!!! Thank you for your contribution.",label="Submitted")
|
| 1186 |
+
save_test.click(save_the_ques_test,[ques,ans,type_options],None)
|
| 1187 |
+
|
| 1188 |
+
## Embedding data generation
|
| 1189 |
+
def update_fields(loss_function):
|
| 1190 |
+
if loss_function == "MultipleNegativesRankingLoss":
|
| 1191 |
+
first_input = gr.Textbox(label="Anchor", visible=True, placeholder="The sentence to be embedded.")
|
| 1192 |
+
second_input = gr.Textbox(label="Positive", visible=True, placeholder="A sentence semantically similar to the anchor.")
|
| 1193 |
+
third_input = gr.Textbox(label="Negative", visible=True, placeholder="A sentence semantically dissimilar to the anchor.")
|
| 1194 |
+
markdown = gr.Markdown(
|
| 1195 |
+
"""
|
| 1196 |
+
**MultipleNegativesRankingLoss:**
|
| 1197 |
+
Expects data with columns: `anchor`, `positive`, `negative`.
|
| 1198 |
+
- `anchor`: The sentence to be embedded.
|
| 1199 |
+
- `positive`: A sentence semantically similar to the anchor.
|
| 1200 |
+
- `negative`: A sentence semantically dissimilar to the anchor.""",
|
| 1201 |
+
visible=True
|
| 1202 |
+
)
|
| 1203 |
+
elif loss_function == "OnlineContrastiveLoss":
|
| 1204 |
+
first_input = gr.Textbox(label="Sentence 1", visible=True, placeholder="The first sentence.")
|
| 1205 |
+
second_input = gr.Textbox(label="Sentence 2", visible=True, placeholder="The second sentence.")
|
| 1206 |
+
third_input = gr.Textbox(label="Label", visible=True, placeholder="1 if the sentences are similar, 0 if dissimilar.")
|
| 1207 |
+
markdown = gr.Markdown(
|
| 1208 |
+
"""
|
| 1209 |
+
**OnlineContrastiveLoss:**
|
| 1210 |
+
Expects data with columns: `sentence1`, `sentence2`, `label`.
|
| 1211 |
+
- `sentence1`, `sentence2`: Pairs of sentences.
|
| 1212 |
+
- `label`: 1 if the sentences are similar, 0 if dissimilar.""",
|
| 1213 |
+
visible=True
|
| 1214 |
+
)
|
| 1215 |
+
elif loss_function == "CoSENTLoss":
|
| 1216 |
+
first_input = gr.Textbox(label="Sentence 1", visible=True, placeholder="The first sentence.")
|
| 1217 |
+
second_input = gr.Textbox(label="Sentence 2", visible=True, placeholder="The second sentence.")
|
| 1218 |
+
third_input = gr.Textbox(label="Score", visible=True, placeholder="A float value (e.g., 0-1) representing their similarity.")
|
| 1219 |
+
markdown = gr.Markdown(
|
| 1220 |
+
"""
|
| 1221 |
+
**CoSENTLoss:**
|
| 1222 |
+
Expects data with columns: `sentence1`, `sentence2`, `score`.
|
| 1223 |
+
- `sentence1`, `sentence2`: Pairs of sentences.
|
| 1224 |
+
- `score`: A float value (e.g., 0-1) representing their similarity.""",
|
| 1225 |
+
visible=True
|
| 1226 |
+
)
|
| 1227 |
+
elif loss_function == "GISTEmbedLoss":
|
| 1228 |
+
first_input = gr.Textbox(label="Anchor", visible=True, placeholder="The sentence to be embedded.")
|
| 1229 |
+
second_input = gr.Textbox(label="Positive", visible=True, placeholder="A sentence semantically similar to the anchor.")
|
| 1230 |
+
third_input = gr.Textbox(label="Negative", visible=True, placeholder="A sentence semantically dissimilar to the anchor. Can be empty.")
|
| 1231 |
+
markdown = gr.Markdown(
|
| 1232 |
+
"""
|
| 1233 |
+
**GISTEmbedLoss:**
|
| 1234 |
+
Expects data with either:
|
| 1235 |
+
- Columns: `anchor`, `positive`, `negative` (like TripletLoss).
|
| 1236 |
+
- Columns: `anchor`, `positive` (for pairs of similar sentences).""",
|
| 1237 |
+
visible=True
|
| 1238 |
+
)
|
| 1239 |
+
elif loss_function == "TripletLoss":
|
| 1240 |
+
first_input = gr.Textbox(label="Anchor", visible=True, placeholder="The sentence to be embedded.")
|
| 1241 |
+
second_input = gr.Textbox(label="Positive", visible=True, placeholder="A sentence semantically similar to the anchor.")
|
| 1242 |
+
third_input = gr.Textbox(label="Negative", visible=True, placeholder="A sentence semantically dissimilar to the anchor.")
|
| 1243 |
+
markdown = gr.Markdown(
|
| 1244 |
+
"""
|
| 1245 |
+
**TripletLoss:**
|
| 1246 |
+
Expects data with columns: `anchor`, `positive`, `negative`.
|
| 1247 |
+
- `anchor`: The sentence to be embedded.
|
| 1248 |
+
- `positive`: A sentence semantically similar to the anchor.
|
| 1249 |
+
- `negative`: A sentence semantically dissimilar to the anchor.""",
|
| 1250 |
+
visible=True
|
| 1251 |
+
)
|
| 1252 |
+
else:
|
| 1253 |
+
first_input = gr.Textbox(visible=False)
|
| 1254 |
+
second_input = gr.Textbox(visible=False)
|
| 1255 |
+
third_input = gr.Textbox(visible=False)
|
| 1256 |
+
markdown = gr.Markdown(visible=False)
|
| 1257 |
+
|
| 1258 |
+
return first_input, second_input, third_input, markdown
|
| 1259 |
+
|
| 1260 |
+
with gr.Tab("Embedding Data Generation"):
|
| 1261 |
+
gr.Markdown("**Choose a loss function to format your embedding data.**")
|
| 1262 |
+
with gr.Row():
|
| 1263 |
+
loss_function = gr.Dropdown(
|
| 1264 |
+
choices=[
|
| 1265 |
+
"MultipleNegativesRankingLoss",
|
| 1266 |
+
"OnlineContrastiveLoss",
|
| 1267 |
+
"CoSENTLoss",
|
| 1268 |
+
"GISTEmbedLoss",
|
| 1269 |
+
"TripletLoss",
|
| 1270 |
+
],
|
| 1271 |
+
label="Select the loss function",
|
| 1272 |
+
)
|
| 1273 |
+
with gr.Row():
|
| 1274 |
+
gr.Markdown("""Format `data/emb_data.xlsx` or `data/emb_data.csv` to the expected data format, according to the selected loss function.
|
| 1275 |
+
If the file exists and has matching columns, new data will be appended.
|
| 1276 |
+
Otherwise, the file will be overwritten.""")
|
| 1277 |
+
with gr.Row():
|
| 1278 |
+
loss_info_markdown = gr.Markdown(visible=False)
|
| 1279 |
+
with gr.Row():
|
| 1280 |
+
first_input = gr.Textbox(label="Anchor", value="",visible=False)
|
| 1281 |
+
second_input = gr.Textbox(label="Positive", value="",visible=False)
|
| 1282 |
+
third_input = gr.Textbox(label="Negative", value="",visible=False)
|
| 1283 |
+
loss_function.change(update_fields, loss_function, [first_input, second_input, third_input,loss_info_markdown])
|
| 1284 |
+
with gr.Row():
|
| 1285 |
+
with gr.Row():
|
| 1286 |
+
type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
|
| 1287 |
+
save_emb = gr.Button(value="Save")
|
| 1288 |
+
save_emb.click(save_emb_data,[loss_function,first_input,second_input,third_input,type_options])
|
| 1289 |
+
|
| 1290 |
+
with gr.Row():
|
| 1291 |
+
gr.Markdown("""
|
| 1292 |
+
## 3. Data parsing for RAG
|
| 1293 |
+
""")
|
| 1294 |
+
with gr.Row():
|
| 1295 |
+
link_temp=gr.Textbox(label="Enter Link to Parse Data for RAG",info="To provide the link for parsing the data from the website, this link can help create RAG data for the model.")
|
| 1296 |
+
parse_data_btn=gr.Button("Parse Data")
|
| 1297 |
+
from utils import parse_data
|
| 1298 |
+
parse_data_btn.click(parse_data_func,link_temp,link_temp)
|
| 1299 |
+
|
| 1300 |
+
#***************************************************
|
| 1301 |
+
with gr.Tab("Fine-tuning"):
|
| 1302 |
+
with gr.Tab("Fine-tune LLM"):
|
| 1303 |
+
with gr.Row():
|
| 1304 |
+
def login_hug(token):
|
| 1305 |
+
from huggingface_hub import login
|
| 1306 |
+
login(token=token)
|
| 1307 |
+
login_hug(os.getenv('HF_TOKEN'))
|
| 1308 |
+
gr.Markdown("""
|
| 1309 |
+
# Instructions:
|
| 1310 |
+
- Required VRAM for training: 24GB, for inference: 16GB.(Mistral, Zepyhr and Lllama)\n
|
| 1311 |
+
- Required VRAM for training: 5GB, for inference: 4GB.(Phi,Flan-T5)
|
| 1312 |
+
- For fine-tuning a custom model select `custom model` option in `Select the model for fine-tuning` dropdown section. The custom model can be configured by editing the code section.\n
|
| 1313 |
+
- After fine-tuning the model, it will be saved in the `models` folder.
|
| 1314 |
+
""")
|
| 1315 |
+
|
| 1316 |
+
def edit_model_parameter(model_name_temp,edit_code,code_temp,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout, progress=gr.Progress()):
|
| 1317 |
+
progress(0, desc="Fine-tune started!! please wait ...")
|
| 1318 |
+
# write code to files if code was edited
|
| 1319 |
+
if edit_code and len(code_temp)!=0:
|
| 1320 |
+
if model_name_temp=="Mistral":
|
| 1321 |
+
open(r"fine_tune_file/mistral_finetune.py","w").write(code_temp)
|
| 1322 |
+
elif model_name_temp=="Zephyr":
|
| 1323 |
+
open(r"fine_tune_file/zepyhr_finetune.py","w").write(code_temp)
|
| 1324 |
+
elif model_name_temp=="Llama":
|
| 1325 |
+
open(r"fine_tune_file/llama_finetune.py","w").write(code_temp)
|
| 1326 |
+
elif model_name_temp=="Phi":
|
| 1327 |
+
open(r"fine_tune_file/phi_finetune.py","w").write(code_temp)
|
| 1328 |
+
elif model_name_temp=="Custom model":
|
| 1329 |
+
open(r"fine_tune_file/finetune_file.py","w").write(code_temp)
|
| 1330 |
+
# importing just before finetuning, to ensure the latest code is used
|
| 1331 |
+
# from fine_tune_file.mistral_finetune import mistral_trainer
|
| 1332 |
+
# from fine_tune_file.zepyhr_finetune import zephyr_trainer
|
| 1333 |
+
# from fine_tune_file.llama_finetune import llama_trainer
|
| 1334 |
+
# from fine_tune_file.phi_finetune import phi_trainer
|
| 1335 |
+
from fine_tune_file.finetune_file import custom_model_trainer
|
| 1336 |
+
# from fine_tune_file.flant5_finetune import flant5_trainer
|
| 1337 |
+
from fine_tune_file.modular_finetune import get_trainer
|
| 1338 |
+
# create instance of the finetuning classes and then call the finetune function
|
| 1339 |
+
|
| 1340 |
+
if model_name_temp=="Custom model":
|
| 1341 |
+
gr.Info("Fine-tune started!!!")
|
| 1342 |
+
trainer=custom_model_trainer()
|
| 1343 |
+
trainer.custom_model_finetune()
|
| 1344 |
+
gr.Info("Fine-tune Ended!!!")
|
| 1345 |
+
else:
|
| 1346 |
+
trainer=get_trainer(model_name_temp)
|
| 1347 |
+
gr.Info("Fine-tune started!!!")
|
| 1348 |
+
if model_name_temp=="Mistral":
|
| 1349 |
+
trainer.mistral_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
|
| 1350 |
+
elif model_name_temp=="Zephyr":
|
| 1351 |
+
trainer.zepyhr_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
|
| 1352 |
+
elif model_name_temp=="Llama":
|
| 1353 |
+
trainer.llama_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
|
| 1354 |
+
elif model_name_temp=="Phi":
|
| 1355 |
+
trainer.phi_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
|
| 1356 |
+
elif model_name_temp=="Flant5":
|
| 1357 |
+
trainer.flant5_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
|
| 1358 |
+
gr.Info("Fine-tune Ended!!!")
|
| 1359 |
+
|
| 1360 |
+
def code_show(model_name):
|
| 1361 |
+
if model_name=="Mistral":
|
| 1362 |
+
f=open(r"fine_tune_file/mistral_finetune.py").read()
|
| 1363 |
+
return gr.Code(visible=True,value=f,interactive=True,language="python")
|
| 1364 |
+
elif model_name=="Zephyr":
|
| 1365 |
+
f=open(r"fine_tune_file/zepyhr_finetune.py").read()
|
| 1366 |
+
return gr.Code(visible=True,value=f,interactive=True,language="python")
|
| 1367 |
+
elif model_name=="Llama":
|
| 1368 |
+
f=open(r"fine_tune_file/llama_finetune.py").read()
|
| 1369 |
+
return gr.Code(visible=True,value=f,interactive=True,language="python")
|
| 1370 |
+
elif model_name=="Phi":
|
| 1371 |
+
f=open(r"fine_tune_file/phi_finetune.py").read()
|
| 1372 |
+
return gr.Code(visible=True,value=f,interactive=True,language="python")
|
| 1373 |
+
elif model_name=="Flant5":
|
| 1374 |
+
f=open(r"fine_tune_file/flant5_finetune.py").read()
|
| 1375 |
+
return gr.Code(visible=True,value=f,interactive=True,language="python")
|
| 1376 |
+
|
| 1377 |
+
def custom_model(model_name): # It shows custom model code in the UI.
|
| 1378 |
+
if model_name=="Custom model":
|
| 1379 |
+
f=open(r"fine_tune_file/finetune_file.py").read()
|
| 1380 |
+
return [gr.Code(visible=True,value=f,interactive=True,language="python"),gr.Button(visible=False)]
|
| 1381 |
+
else:
|
| 1382 |
+
return [gr.Code(visible=False),gr.Button("Advance Code Editing",visible=True)]
|
| 1383 |
+
def change_code_fun(code_,model_name):
|
| 1384 |
+
if model_name=="Mistral":
|
| 1385 |
+
open(r"fine_tune_file/mistral_finetune.py","w").write(code_)
|
| 1386 |
+
gr.Info("Successfully saved code!!!")
|
| 1387 |
+
elif model_name=="Zephyr":
|
| 1388 |
+
open(r"fine_tune_file/zepyhr_finetune.py","w").write(code_)
|
| 1389 |
+
gr.Info("Successfully saved code!!!")
|
| 1390 |
+
elif model_name=="Llama":
|
| 1391 |
+
open(r"fine_tune_file/llama_finetune.py","w").write(code_)
|
| 1392 |
+
gr.Info("Successfully saved code!!!")
|
| 1393 |
+
elif model_name=="Phi":
|
| 1394 |
+
open(r"fine_tune_file/phi_finetune.py","w").write(code_)
|
| 1395 |
+
gr.Info("Successfully saved code!!!")
|
| 1396 |
+
elif model_name=="Flant5":
|
| 1397 |
+
open(r"fine_tune_file/flant5_finetune.py","w").write(code_)
|
| 1398 |
+
gr.Info("Successfully saved code!!!")
|
| 1399 |
+
|
| 1400 |
+
def finetune_emb(model_name, loss_name, epochs = 1, batch_size = 8):
|
| 1401 |
+
gr.Info("Embedding model fine-tune is started!!!")
|
| 1402 |
+
from embedding_tuner import EmbeddingFinetuner
|
| 1403 |
+
finetuner = EmbeddingFinetuner(
|
| 1404 |
+
model_name=model_name,
|
| 1405 |
+
loss_function=loss_name,
|
| 1406 |
+
epochs=epochs,
|
| 1407 |
+
batch_size=batch_size,
|
| 1408 |
+
)
|
| 1409 |
+
success = finetuner.train()
|
| 1410 |
+
if success:
|
| 1411 |
+
gr.Info("Embedding model fine-tune finished!!!")
|
| 1412 |
+
|
| 1413 |
+
with gr.Row():
|
| 1414 |
+
code_temp=gr.Code(visible=False)
|
| 1415 |
+
with gr.Row():
|
| 1416 |
+
model_name=gr.Dropdown(choices=["Mistral","Zephyr","Llama","Phi","Flant5","Custom model"],label="Select the LLM for fine-tuning")
|
| 1417 |
+
with gr.Accordion("Parameter Setup"):
|
| 1418 |
+
with gr.Row():
|
| 1419 |
+
lr=gr.Number(label="learning_rate",value=5e-6,interactive=True,info="The step size at which the model parameters are updated during training. It controls the magnitude of the updates to the model's weights.")
|
| 1420 |
+
epoch=gr.Number(label="epochs",value=2,interactive=True,info="One complete pass through the entire training dataset during the training process. It's a measure of how many times the algorithm has seen the entire dataset.")
|
| 1421 |
+
batch_size=gr.Number(label="batch_size",value=4,interactive=True,info="The number of training examples used in one iteration of training. It affects the speed and stability of the training process.")
|
| 1422 |
+
gradient_accumulation = gr.Number(info="Gradient accumulation involves updating model weights after accumulating gradients over multiple batches, instead of after each individual batch.",label="gradient_accumulation",value=4,interactive=True)
|
| 1423 |
+
with gr.Row():
|
| 1424 |
+
quantization = gr.Dropdown(info="Quantization is a technique used to reduce the precision of numerical values, typically from 32-bit floating-point numbers to lower bit representations.",label="quantization",choices=[4,8],value=8,interactive=True)
|
| 1425 |
+
lora_r = gr.Number(info="LoRA_r is a hyperparameter associated with the rank of the low-rank approximation used in LoRA.",label="lora_r",value=16,interactive=True)
|
| 1426 |
+
lora_alpha = gr.Number(info="LoRA_alpha is a hyperparameter used in LoRA for controlling the strength of the adaptation.",label="lora_alpha",value=32,interactive=True)
|
| 1427 |
+
lora_dropout = gr.Number(info="LoRA_dropout is a hyperparameter used in LoRA to control the dropout rate during fine-tuning.",label="lora_dropout",value=.05,interactive=True)
|
| 1428 |
+
with gr.Row():
|
| 1429 |
+
edit_code=gr.Button("Advance Code Editing")
|
| 1430 |
+
with gr.Row():
|
| 1431 |
+
code_temp=gr.Code(visible=False)
|
| 1432 |
+
with gr.Row():
|
| 1433 |
+
parameter_alter=gr.Button("Fine-tune")
|
| 1434 |
+
with gr.Row():
|
| 1435 |
+
fin_com=gr.Label(visible=False)
|
| 1436 |
+
edit_code.click(code_show,model_name,code_temp)
|
| 1437 |
+
# On click finetune button
|
| 1438 |
+
parameter_alter.click(edit_model_parameter,[model_name,edit_code,code_temp,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout],model_name)
|
| 1439 |
+
model_name.change(custom_model,model_name,[code_temp,edit_code])
|
| 1440 |
+
with gr.Tab("Embedding model"):
|
| 1441 |
+
with gr.Row():
|
| 1442 |
+
embedding_model = gr.Dropdown(
|
| 1443 |
+
choices=[
|
| 1444 |
+
"BAAI/bge-base-en-v1.5",
|
| 1445 |
+
"dunzhang/stella_en_1.5B_v5",
|
| 1446 |
+
"dunzhang/stella_en_400M_v5",
|
| 1447 |
+
"nvidia/NV-Embed-v2",
|
| 1448 |
+
"Alibaba-NLP/gte-Qwen2-1.5B-instruct",
|
| 1449 |
+
],
|
| 1450 |
+
label="Select the embedding model for fine-tuning",
|
| 1451 |
+
)
|
| 1452 |
+
loss_function = gr.Dropdown(
|
| 1453 |
+
choices=[
|
| 1454 |
+
"MultipleNegativesRankingLoss",
|
| 1455 |
+
"OnlineContrastiveLoss",
|
| 1456 |
+
"CoSENTLoss",
|
| 1457 |
+
"GISTEmbedLoss",
|
| 1458 |
+
"TripletLoss",
|
| 1459 |
+
],
|
| 1460 |
+
label="Select the loss function",
|
| 1461 |
+
)
|
| 1462 |
+
|
| 1463 |
+
epoch=gr.Number(label="epochs",value=1,interactive=True,info="One complete pass through the entire training dataset during the training process.")
|
| 1464 |
+
batch_size=gr.Number(label="batch_size",value=8,interactive=True,info="The number of training examples used in one iteration of training.")
|
| 1465 |
+
with gr.Row():
|
| 1466 |
+
btn_emb = gr.Button("Fine-tune the embedding model")
|
| 1467 |
+
|
| 1468 |
+
|
| 1469 |
+
# with gr.Row():
|
| 1470 |
+
# with gr.Accordion(label="Expected data format according to loss function"):
|
| 1471 |
+
# loss_info = gr.Markdown(
|
| 1472 |
+
# """
|
| 1473 |
+
# # Expected data format according to loss function:
|
| 1474 |
+
# ### Format `data/emb_data.xlsx` | `data/emb_data.xlsx` accordingly.
|
| 1475 |
+
|
| 1476 |
+
# **MultipleNegativesRankingLoss:**
|
| 1477 |
+
# Expects data with columns: `anchor`, `positive`, `negative`.
|
| 1478 |
+
# - `anchor`: The sentence to be embedded.
|
| 1479 |
+
# - `positive`: A sentence semantically similar to the anchor.
|
| 1480 |
+
# - `negative`: A sentence semantically dissimilar to the anchor.
|
| 1481 |
+
|
| 1482 |
+
# **OnlineContrastiveLoss:**
|
| 1483 |
+
# Expects data with columns: `sentence1`, `sentence2`, `label`.
|
| 1484 |
+
# - `sentence1`, `sentence2`: Pairs of sentences.
|
| 1485 |
+
# - `label`: 1 if the sentences are similar, 0 if dissimilar.
|
| 1486 |
+
|
| 1487 |
+
# **CoSENTLoss:**
|
| 1488 |
+
# Expects data with columns: `sentence1`, `sentence2`, `score`.
|
| 1489 |
+
# - `sentence1`, `sentence2`: Pairs of sentences.
|
| 1490 |
+
# - `score`: A float value (e.g., 0-1) representing their similarity.
|
| 1491 |
+
|
| 1492 |
+
# **GISTEmbedLoss:**
|
| 1493 |
+
# Expects data with either:
|
| 1494 |
+
# - Columns: `anchor`, `positive`, `negative` (like TripletLoss).
|
| 1495 |
+
# - Columns: `anchor`, `positive` (for pairs of similar sentences).
|
| 1496 |
+
|
| 1497 |
+
# **TripletLoss:**
|
| 1498 |
+
# Expects data with columns: `anchor`, `positive`, `negative`.
|
| 1499 |
+
# - `anchor`: The sentence to be embedded.
|
| 1500 |
+
# - `positive`: A sentence semantically similar to the anchor.
|
| 1501 |
+
# - `negative`: A sentence semantically dissimilar to the anchor.
|
| 1502 |
+
# """
|
| 1503 |
+
# )
|
| 1504 |
+
|
| 1505 |
+
|
| 1506 |
+
|
| 1507 |
+
btn_emb.click(finetune_emb,[embedding_model, loss_function, epoch, batch_size], None)
|
| 1508 |
+
#***************************************************
|
| 1509 |
+
with gr.Tab("Testing Data Generation and RAG Customization"):
|
| 1510 |
+
from utils import save_params_to_file
|
| 1511 |
+
def ans_gen_fun(model_name,embedding_name,
|
| 1512 |
+
splitter_type_dropdown,chunk_size_slider,
|
| 1513 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider,save_as_fav,progress=gr.Progress()):
|
| 1514 |
+
if not os.path.exists(os.path.join("data","testing_dataset.xlsx")):
|
| 1515 |
+
gr.Warning("You need to create testing dataset first from Data collection.")
|
| 1516 |
+
return
|
| 1517 |
+
if save_as_fav:
|
| 1518 |
+
save_params_to_file(model_name,embedding_name,
|
| 1519 |
+
splitter_type_dropdown,chunk_size_slider,
|
| 1520 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider)
|
| 1521 |
+
if not os.path.exists(model_name):
|
| 1522 |
+
gr.Error("Model not found in local folder!!")
|
| 1523 |
+
import time
|
| 1524 |
+
from model_ret import calculate_rag_metrics
|
| 1525 |
+
progress(0, desc="Starting...")
|
| 1526 |
+
idx=1
|
| 1527 |
+
model_ques_ans_gen=[]
|
| 1528 |
+
df_temp=pd.read_excel(r"data/testing_dataset.xlsx")
|
| 1529 |
+
infer_model = model_chain(model_name,None,
|
| 1530 |
+
True,embedding_name,splitter_type_dropdown,chunk_size_slider,
|
| 1531 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider)
|
| 1532 |
+
rag_chain=infer_model.rag_chain_ret()
|
| 1533 |
+
for x in progress.tqdm(df_temp.values):
|
| 1534 |
+
model_ques_ans_gen.append({
|
| 1535 |
+
"id":idx,
|
| 1536 |
+
"question":x[0]
|
| 1537 |
+
,'answer':rag_chain.ans_ret(x,rag_chain)
|
| 1538 |
+
, "contexts":x[2]
|
| 1539 |
+
, "ground_truths":x[1]
|
| 1540 |
+
})
|
| 1541 |
+
idx+=1
|
| 1542 |
+
temp=calculate_rag_metrics(model_ques_ans_gen,model_name)
|
| 1543 |
+
temp['Average Rating'] = temp.mean(axis=1)
|
| 1544 |
+
pd.DataFrame(temp).to_excel(os.path.join("model_ans",f"_{model_name+cur_time}.xlsx"),index=False)
|
| 1545 |
+
rag_metrics = ['answer_correctness', 'answer_similarity', 'answer_relevancy', 'faithfulness', 'context_recall', 'context_precision']
|
| 1546 |
+
new_df = pd.DataFrame({'Rag Metric': rag_metrics, 'Average Rating': temp.mean()})
|
| 1547 |
+
|
| 1548 |
+
return gr.BarPlot(
|
| 1549 |
+
new_df,
|
| 1550 |
+
x="Rag Metric",
|
| 1551 |
+
y="Average Rating",
|
| 1552 |
+
x_title="Rag Metric",
|
| 1553 |
+
y_title="Average Rating",
|
| 1554 |
+
title="RAG performance",
|
| 1555 |
+
tooltip=["Rag Metric", "Average Rating"],
|
| 1556 |
+
y_lim=[1, 200],
|
| 1557 |
+
width=150,
|
| 1558 |
+
# height=1000,
|
| 1559 |
+
visible=True
|
| 1560 |
+
)
|
| 1561 |
+
gr.Info("Generating answer from model is finished!!! Now, it is ready for human evaluation. Model answer is saved in \"model_ans\" folder. ")
|
| 1562 |
+
|
| 1563 |
+
gr.Markdown(""" # Instructions:\n
|
| 1564 |
+
In this page you can generate answer from fine-tuned models for human evaluation. The questions must be created using `Testing data generation` section of `Data collection` tab.
|
| 1565 |
+
""")
|
| 1566 |
+
with gr.Row():
|
| 1567 |
+
embedding_name=gr.Dropdown(choices=["BAAI/bge-base-en-v1.5","dunzhang/stella_en_1.5B_v5","dunzhang/stella_en_400M_v5",
|
| 1568 |
+
"nvidia/NV-Embed-v2","Alibaba-NLP/gte-Qwen2-1.5B-instruct"],
|
| 1569 |
+
label="Select the Embedding Model")
|
| 1570 |
+
splitter_type_dropdown = gr.Dropdown(choices=["character", "recursive", "token"],
|
| 1571 |
+
value="character", label="Splitter Type",interactive=True)
|
| 1572 |
+
|
| 1573 |
+
chunk_size_slider = gr.Slider(minimum=100, maximum=2000, value=500, step=50, label="Chunk Size")
|
| 1574 |
+
chunk_overlap_slider = gr.Slider(minimum=0, maximum=500, value=30, step=10, label="Chunk Overlap",interactive=True)
|
| 1575 |
+
separator_textbox = gr.Textbox(value="\n", label="Separator (e.g., newline '\\n')",interactive=True)
|
| 1576 |
+
max_tokens_slider = gr.Slider(minimum=100, maximum=5000, value=1000, step=100, label="Max Tokens",interactive=True)
|
| 1577 |
+
with gr.Row():
|
| 1578 |
+
save_as_fav=gr.Checkbox(label="Save this settings as favorite")
|
| 1579 |
+
model_name=gr.Dropdown(choices=os.listdir("models"),label="Select the Model")
|
| 1580 |
+
with gr.Row():
|
| 1581 |
+
ans_gen=gr.Button("Generate Answer for Testing Dataset")
|
| 1582 |
+
with gr.Row():
|
| 1583 |
+
plot = gr.BarPlot(visible=False)
|
| 1584 |
+
ans_gen.click(ans_gen_fun,[model_name,embedding_name,
|
| 1585 |
+
splitter_type_dropdown,chunk_size_slider,
|
| 1586 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider,save_as_fav],plot)
|
| 1587 |
+
#***************************************************Human evaluation
|
| 1588 |
+
import secrets
|
| 1589 |
+
|
| 1590 |
+
def generate_token():
|
| 1591 |
+
while True:
|
| 1592 |
+
token=secrets.token_hex(6)
|
| 1593 |
+
f=[x[:-5] for x in os.listdir("save_ques_ans")]
|
| 1594 |
+
if token not in f:
|
| 1595 |
+
data = {
|
| 1596 |
+
'id': [],
|
| 1597 |
+
'question': [],
|
| 1598 |
+
'answer': []
|
| 1599 |
+
}
|
| 1600 |
+
df = pd.DataFrame(data)
|
| 1601 |
+
df.to_excel("save_ques_ans//"+str(token)+".xlsx", index=False)
|
| 1602 |
+
return gr.Label(label="Please keep the token for tracking question answer data",value=token,visible=True)
|
| 1603 |
+
|
| 1604 |
+
def bar_plot_fn():
|
| 1605 |
+
temp=score_report_bar()
|
| 1606 |
+
return gr.BarPlot(
|
| 1607 |
+
temp,
|
| 1608 |
+
x="Model Name",
|
| 1609 |
+
y="Average Rating",
|
| 1610 |
+
x_title="Model name",
|
| 1611 |
+
y_title="Average Rating",
|
| 1612 |
+
title="Model performance",
|
| 1613 |
+
tooltip=["Model Name", "Average Rating"],
|
| 1614 |
+
y_lim=[1, 200],
|
| 1615 |
+
width=150,
|
| 1616 |
+
# height=1000,
|
| 1617 |
+
visible=True
|
| 1618 |
+
)
|
| 1619 |
+
with gr.Tab("Human Evaluation"):
|
| 1620 |
+
def answer_updated(model_ans):
|
| 1621 |
+
df_ques_ans=pd.read_excel(os.path.join("model_ans",str(model_ans)))
|
| 1622 |
+
num=0
|
| 1623 |
+
print(df_ques_ans['id'][num],"**"*10)
|
| 1624 |
+
return [gr.Markdown(value=f"""# Model_name: {model_ans}
|
| 1625 |
+
# Number of questions: {len(df_ques_ans)}""",visible=True),
|
| 1626 |
+
gr.Label(value=str(df_ques_ans['id'][num])),
|
| 1627 |
+
gr.Label(value=str(df_ques_ans['question'][num])),
|
| 1628 |
+
gr.Label(value=str(df_ques_ans['answer'][num])),
|
| 1629 |
+
gr.Dropdown(visible=False),
|
| 1630 |
+
gr.Button(visible=False)
|
| 1631 |
+
]
|
| 1632 |
+
|
| 1633 |
+
with gr.Row():
|
| 1634 |
+
new_user=gr.Button("New User")
|
| 1635 |
+
with gr.Row():
|
| 1636 |
+
new_user_token=gr.Label(visible=False)
|
| 1637 |
+
with gr.Row():
|
| 1638 |
+
token_key=gr.Textbox(label="Enter your Token")
|
| 1639 |
+
model_ans=gr.Dropdown(choices=os.listdir("model_ans"),label="Select the Model Answer for Human Evaluation")
|
| 1640 |
+
btn_1=gr.Button("Submit")
|
| 1641 |
+
gr.Markdown(""" # Instructions:
|
| 1642 |
+
In this section, humans evaluate the answers of the model given specific questions. Each answer is rated between 1 and 5 by anonymous students.
|
| 1643 |
+
Those values are saved in the `scrore_report` folder.
|
| 1644 |
+
""")
|
| 1645 |
+
lab_temp=gr.Markdown(visible=False)
|
| 1646 |
+
|
| 1647 |
+
with gr.Row():
|
| 1648 |
+
id=gr.Label(value="",label="ID")
|
| 1649 |
+
with gr.Row():
|
| 1650 |
+
ques=gr.Label(value="",label="Question")
|
| 1651 |
+
with gr.Row():
|
| 1652 |
+
ans=gr.Label(value="",label="Answer")
|
| 1653 |
+
with gr.Row():
|
| 1654 |
+
score = gr.Radio(choices=[1,2,3,4,5],label="Rating")
|
| 1655 |
+
with gr.Row():
|
| 1656 |
+
human_ans_btn=gr.Button("Show Answer From Other Evaluators")
|
| 1657 |
+
with gr.Row():
|
| 1658 |
+
human_ans_lab=gr.Label(label="Human Answer",visible=False)
|
| 1659 |
+
with gr.Row():
|
| 1660 |
+
btn = gr.Button("Save")
|
| 1661 |
+
question = gr.Button("Skip")
|
| 1662 |
+
# with gr.Row():
|
| 1663 |
+
# save_all_btn=gr.Button("Save all the data in dataframe")
|
| 1664 |
+
# with gr.Row():
|
| 1665 |
+
# move=gr.Number(label="Move to the question")
|
| 1666 |
+
# move_btn=gr.Button("move")
|
| 1667 |
+
with gr.Row():
|
| 1668 |
+
btn_plot=gr.Button("Plot Generation")
|
| 1669 |
+
with gr.Row():
|
| 1670 |
+
plot = gr.BarPlot(visible=False)
|
| 1671 |
+
btn_plot.click(bar_plot_fn, None, outputs=plot)
|
| 1672 |
+
btn_1.click(answer_updated,model_ans,[lab_temp,id,ques,ans,model_ans,btn_1])
|
| 1673 |
+
btn.click(score_save, inputs=[ques,ans,score,model_ans,token_key], outputs=[id,ques,ans])
|
| 1674 |
+
question.click(new_ques,model_ans,[id,ques,ans])
|
| 1675 |
+
# save_all_btn.click(save_all,model_ans,None)
|
| 1676 |
+
# move_btn.click(move_to,[move,model_ans],[id,ques,ans])
|
| 1677 |
+
def human_ans_func(id, ques):
|
| 1678 |
+
temp=all_contri_ans(id,ques)
|
| 1679 |
+
return [gr.Button("Show Answer from Other Evaluators"),gr.Label(value="\n".join(temp),visible=True)]
|
| 1680 |
+
human_ans_btn.click(human_ans_func,[id, ques],[human_ans_btn,human_ans_lab])
|
| 1681 |
+
new_user.click(generate_token,None,new_user_token)
|
| 1682 |
+
|
| 1683 |
+
#***************************************************
|
| 1684 |
+
infer_ragchain=None
|
| 1685 |
+
with gr.Tab("Inference"):
|
| 1686 |
+
def echo(message, history,model_name_local,model_name_online,
|
| 1687 |
+
inf_checkbox,embedding_name,splitter_type_dropdown,chunk_size_slider,
|
| 1688 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider):
|
| 1689 |
+
global infer_ragchain
|
| 1690 |
+
if infer_ragchain is None:
|
| 1691 |
+
gr.Info("Please wait!!! model is loading!!")
|
| 1692 |
+
if inf_checkbox:
|
| 1693 |
+
gr.Info("Model is loading from Huggingface!!")
|
| 1694 |
+
infer_ragchain = model_chain(model_name_local,model_name_online,
|
| 1695 |
+
inf_checkbox,embedding_name,splitter_type_dropdown,chunk_size_slider,
|
| 1696 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider)
|
| 1697 |
+
rag_chain=infer_ragchain.rag_chain_ret()
|
| 1698 |
+
return infer_ragchain.ans_ret(message,rag_chain)
|
| 1699 |
+
from utils import load_params_from_file
|
| 1700 |
+
saved_params = load_params_from_file()
|
| 1701 |
+
# If saved parameters exist, use them; otherwise, set default values
|
| 1702 |
+
default_model_name = saved_params['model_name'] if saved_params else "Llama"
|
| 1703 |
+
default_embedding_name = saved_params['embedding_name'] if saved_params else "BAAI/bge-base-en-v1.5"
|
| 1704 |
+
default_splitter_type = saved_params['splitter_type_dropdown'] if saved_params else "character"
|
| 1705 |
+
default_chunk_size = saved_params['chunk_size_slider'] if saved_params else 500
|
| 1706 |
+
default_chunk_overlap = saved_params['chunk_overlap_slider'] if saved_params else 30
|
| 1707 |
+
default_separator = saved_params['separator_textbox'] if saved_params else "\n"
|
| 1708 |
+
default_max_tokens = saved_params['max_tokens_slider'] if saved_params else 1000
|
| 1709 |
+
with gr.Row():
|
| 1710 |
+
def login_hug(token):
|
| 1711 |
+
from huggingface_hub import login
|
| 1712 |
+
login(token=token)
|
| 1713 |
+
login_hug(os.getenv('HF_TOKEN'))
|
| 1714 |
+
with gr.Row():
|
| 1715 |
+
embedding_name=gr.Dropdown(choices=["BAAI/bge-base-en-v1.5","dunzhang/stella_en_1.5B_v5","dunzhang/stella_en_400M_v5",
|
| 1716 |
+
"nvidia/NV-Embed-v2","Alibaba-NLP/gte-Qwen2-1.5B-instruct"],value=default_embedding_name,
|
| 1717 |
+
label="Select the Embedding Model")
|
| 1718 |
+
splitter_type_dropdown = gr.Dropdown(choices=["character", "recursive", "token"],
|
| 1719 |
+
value=default_splitter_type, label="Splitter Type",interactive=True)
|
| 1720 |
+
|
| 1721 |
+
chunk_size_slider = gr.Slider(minimum=100, maximum=2000, value=default_chunk_size, step=50, label="Chunk Size")
|
| 1722 |
+
chunk_overlap_slider = gr.Slider(minimum=0, maximum=500, value=default_chunk_overlap, step=10, label="Chunk Overlap",interactive=True)
|
| 1723 |
+
separator_textbox = gr.Textbox(value=default_separator, label="Separator (e.g., newline '\\n')",interactive=True)
|
| 1724 |
+
max_tokens_slider = gr.Slider(minimum=100, maximum=5000, value=default_max_tokens, step=100, label="Max Tokens",interactive=True)
|
| 1725 |
+
|
| 1726 |
+
inf_checkbox=gr.Checkbox(label="Do you want to use without fine-tuned model from Hugging face?")
|
| 1727 |
+
model_name_local=gr.Dropdown(choices=os.listdir("models"),visible=True,label="Select the fine-tuned LLM",value=default_model_name)
|
| 1728 |
+
model_name_online=gr.Dropdown(visible=False)
|
| 1729 |
+
def model_online_local_show(inf_checkbox):
|
| 1730 |
+
if inf_checkbox:
|
| 1731 |
+
return [gr.Dropdown(visible=False),
|
| 1732 |
+
gr.Dropdown(choices=["Zephyr","Llama","Mistral", "Phi", "Flant5"],
|
| 1733 |
+
label="Select the LLM from Huggingface",visible=True)]
|
| 1734 |
+
else:
|
| 1735 |
+
return [gr.Dropdown(choices=os.listdir("models"),label="Select the fine-tuned LLM",visible=True),
|
| 1736 |
+
gr.Dropdown(visible=False)]
|
| 1737 |
+
|
| 1738 |
+
inf_checkbox.change(model_online_local_show,[inf_checkbox],[model_name_local,model_name_online])
|
| 1739 |
+
gr.ChatInterface(fn=echo,
|
| 1740 |
+
additional_inputs=[model_name_local,model_name_online,inf_checkbox,embedding_name,
|
| 1741 |
+
splitter_type_dropdown,chunk_size_slider,
|
| 1742 |
+
chunk_overlap_slider,separator_textbox,max_tokens_slider],
|
| 1743 |
+
title="Chatbot")
|
| 1744 |
+
#----------------------------------------------
|
| 1745 |
+
with gr.Tab("Deployment"):
|
| 1746 |
+
gr.Markdown("""`deploy` folder has all the code for the deployment of the model.
|
| 1747 |
+
For installing dependencies use the following command: `pip install -r requirements.txt`.
|
| 1748 |
+
""")
|
| 1749 |
+
def deploy_func(model_name):
|
| 1750 |
+
import shutil
|
| 1751 |
+
import os
|
| 1752 |
+
src_folder = 'src'
|
| 1753 |
+
deploy_folder = 'deploy'
|
| 1754 |
+
files_to_copy = ['model_ret.py', 'create_retriever.py', 'inference.py']
|
| 1755 |
+
for file_name in files_to_copy:
|
| 1756 |
+
src_file_path = os.path.join(src_folder, file_name)
|
| 1757 |
+
dest_file_path = os.path.join(deploy_folder, file_name)
|
| 1758 |
+
param_list=load_params_from_file()
|
| 1759 |
+
param_list["model_name"]=model_name
|
| 1760 |
+
save_params_to_file(param_list)
|
| 1761 |
+
# f.write(f"{model_name}")
|
| 1762 |
+
|
| 1763 |
+
|
| 1764 |
+
model_name=gr.Dropdown(choices=os.listdir("models"),label="Select the Model")
|
| 1765 |
+
btn_model=gr.Button("Deploy")
|
| 1766 |
+
btn_model.click(deploy_func,model_name)
|
| 1767 |
+
|
| 1768 |
+
demo.launch(share=False)
|
create_retriever.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import glob
|
| 3 |
+
from langchain_community.document_loaders import Docx2txtLoader, TextLoader, PyPDFLoader
|
| 4 |
+
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, TokenTextSplitter
|
| 5 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
| 6 |
+
from langchain.vectorstores import Chroma
|
| 7 |
+
from langchain.retrievers import EnsembleRetriever
|
| 8 |
+
# from ragatouille import RAGPretrainedModel
|
| 9 |
+
|
| 10 |
+
# Function to load and process documents
|
| 11 |
+
def docs_return(flag):
|
| 12 |
+
directory_path = 'rag_data/'
|
| 13 |
+
docx_file_pattern = '*.docx'
|
| 14 |
+
pdf_file_pattern = '*.pdf'
|
| 15 |
+
txt_file_pattern = '*.txt'
|
| 16 |
+
|
| 17 |
+
docx_file_paths = glob.glob(directory_path + docx_file_pattern)
|
| 18 |
+
pdf_file_paths = glob.glob(directory_path + pdf_file_pattern)
|
| 19 |
+
txt_file_paths = glob.glob(directory_path + txt_file_pattern)
|
| 20 |
+
|
| 21 |
+
all_doc, all_doc2 = [], []
|
| 22 |
+
|
| 23 |
+
for x in docx_file_paths:
|
| 24 |
+
loader = Docx2txtLoader(x)
|
| 25 |
+
documents = loader.load()
|
| 26 |
+
all_doc.extend(documents)
|
| 27 |
+
all_doc2.append(str(documents[0].page_content))
|
| 28 |
+
|
| 29 |
+
for x in pdf_file_paths:
|
| 30 |
+
loader = PyPDFLoader(x, extract_images=True)
|
| 31 |
+
docs_lazy = loader.lazy_load()
|
| 32 |
+
documents = []
|
| 33 |
+
for doc in docs_lazy:
|
| 34 |
+
documents.append(doc)
|
| 35 |
+
all_doc.extend(documents)
|
| 36 |
+
all_doc2.append(str(documents[0].page_content))
|
| 37 |
+
|
| 38 |
+
for x in txt_file_paths:
|
| 39 |
+
loader = TextLoader(x)
|
| 40 |
+
documents = loader.load()
|
| 41 |
+
all_doc.extend(documents)
|
| 42 |
+
all_doc2.append(str(documents[0].page_content))
|
| 43 |
+
|
| 44 |
+
docs = '\n\n'.join(all_doc2)
|
| 45 |
+
|
| 46 |
+
return all_doc if flag == 0 else docs
|
| 47 |
+
|
| 48 |
+
# Function to get or download the embedding model
|
| 49 |
+
def get_embedding_model(model_name):
|
| 50 |
+
local_model_path = f"embedding_model/{model_name.replace('/', '_')}"
|
| 51 |
+
if os.path.exists(local_model_path):
|
| 52 |
+
print(f"Loading local model from {local_model_path}")
|
| 53 |
+
return HuggingFaceEmbeddings(model_name=local_model_path)
|
| 54 |
+
else:
|
| 55 |
+
print(f"Downloading model {model_name}")
|
| 56 |
+
return HuggingFaceEmbeddings(model_name=model_name)
|
| 57 |
+
|
| 58 |
+
# Function to return different types of text splitters
|
| 59 |
+
def get_text_splitter(splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
|
| 60 |
+
if splitter_type == 'character':
|
| 61 |
+
return CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator)
|
| 62 |
+
elif splitter_type == 'recursive':
|
| 63 |
+
return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
| 64 |
+
elif splitter_type == 'token':
|
| 65 |
+
return TokenTextSplitter(chunk_size=max_tokens, chunk_overlap=chunk_overlap)
|
| 66 |
+
else:
|
| 67 |
+
raise ValueError("Unsupported splitter type. Choose from 'character', 'recursive', or 'token'.")
|
| 68 |
+
|
| 69 |
+
# Retriever using Chroma and HuggingFace embeddings
|
| 70 |
+
def retriever_chroma(flag, model_name="BAAI/bge-large-en-v1.5", splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
|
| 71 |
+
# Load or download the embedding model
|
| 72 |
+
embeddings = get_embedding_model(model_name)
|
| 73 |
+
|
| 74 |
+
if not flag:
|
| 75 |
+
# Load the documents
|
| 76 |
+
all_doc = docs_return(0)
|
| 77 |
+
|
| 78 |
+
# Use the splitter parameters
|
| 79 |
+
text_splitter = get_text_splitter(splitter_type=splitter_type, chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator, max_tokens=max_tokens)
|
| 80 |
+
|
| 81 |
+
# Split the documents using the text splitter
|
| 82 |
+
docs = text_splitter.split_documents(documents=all_doc)
|
| 83 |
+
|
| 84 |
+
# Create a Chroma vector database
|
| 85 |
+
vectordb = Chroma.from_documents(docs, embeddings, persist_directory="./chroma_db")
|
| 86 |
+
|
| 87 |
+
# Create the retriever
|
| 88 |
+
chroma_retriever = vectordb.as_retriever(
|
| 89 |
+
search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}
|
| 90 |
+
)
|
| 91 |
+
return chroma_retriever
|
| 92 |
+
else:
|
| 93 |
+
# Load a local Chroma vectorstore
|
| 94 |
+
vectordb = Chroma.load_local("vectorstore", embeddings)
|
| 95 |
+
chroma_retriever = vectordb.as_retriever(
|
| 96 |
+
search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}
|
| 97 |
+
)
|
| 98 |
+
return chroma_retriever
|
data/demo_table_data.xlsx
ADDED
|
Binary file (11.1 kB). View file
|
|
|
data/demo_test_data.xlsx
ADDED
|
Binary file (12.4 kB). View file
|
|
|
data/emb_data.xlsx
ADDED
|
Binary file (4.97 kB). View file
|
|
|
data/existing_dataset.xlsx
ADDED
|
Binary file (10.5 kB). View file
|
|
|
data/finetune_data.xlsx
ADDED
|
Binary file (5 kB). View file
|
|
|
data/testing_dataset.xlsx
ADDED
|
Binary file (27.5 kB). View file
|
|
|
data/validation-kuetllm_tanim - Copy.xlsx
ADDED
|
Binary file (11.2 kB). View file
|
|
|
data_ret.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_community.document_loaders import WebBaseLoader
|
| 2 |
+
def data_ret2(link):
|
| 3 |
+
start=1
|
| 4 |
+
loader = WebBaseLoader(f"{link}")
|
| 5 |
+
data = loader.load()
|
| 6 |
+
return data
|
deploy/create_retriever.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import glob
|
| 3 |
+
from langchain_community.document_loaders import Docx2txtLoader, TextLoader
|
| 4 |
+
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, TokenTextSplitter
|
| 5 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
| 6 |
+
from langchain.vectorstores import Chroma
|
| 7 |
+
from langchain.retrievers import EnsembleRetriever
|
| 8 |
+
from ragatouille import RAGPretrainedModel
|
| 9 |
+
|
| 10 |
+
# Function to load and process documents
|
| 11 |
+
def docs_return(flag):
|
| 12 |
+
directory_path = 'rag_data/'
|
| 13 |
+
docx_file_pattern = '*.docx'
|
| 14 |
+
txt_file_pattern = '*.txt'
|
| 15 |
+
|
| 16 |
+
docx_file_paths = glob.glob(directory_path + docx_file_pattern)
|
| 17 |
+
txt_file_paths = glob.glob(directory_path + txt_file_pattern)
|
| 18 |
+
|
| 19 |
+
all_doc, all_doc2 = [], []
|
| 20 |
+
|
| 21 |
+
for x in docx_file_paths:
|
| 22 |
+
loader = Docx2txtLoader(x)
|
| 23 |
+
documents = loader.load()
|
| 24 |
+
all_doc.extend(documents)
|
| 25 |
+
all_doc2.append(str(documents[0].page_content))
|
| 26 |
+
|
| 27 |
+
for x in txt_file_paths:
|
| 28 |
+
loader = TextLoader(x)
|
| 29 |
+
documents = loader.load()
|
| 30 |
+
all_doc.extend(documents)
|
| 31 |
+
all_doc2.append(str(documents[0].page_content))
|
| 32 |
+
|
| 33 |
+
docs = '\n\n'.join(all_doc2)
|
| 34 |
+
|
| 35 |
+
return all_doc if flag == 0 else docs
|
| 36 |
+
|
| 37 |
+
# Function to get or download the embedding model
|
| 38 |
+
def get_embedding_model(model_name):
|
| 39 |
+
local_model_path = f"embedding_model/{model_name.replace('/', '_')}"
|
| 40 |
+
if os.path.exists(local_model_path):
|
| 41 |
+
print(f"Loading local model from {local_model_path}")
|
| 42 |
+
return HuggingFaceEmbeddings(model_name=local_model_path)
|
| 43 |
+
else:
|
| 44 |
+
print(f"Downloading model {model_name}")
|
| 45 |
+
return HuggingFaceEmbeddings(model_name=model_name)
|
| 46 |
+
|
| 47 |
+
# Function to return different types of text splitters
|
| 48 |
+
def get_text_splitter(splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
|
| 49 |
+
if splitter_type == 'character':
|
| 50 |
+
return CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator)
|
| 51 |
+
elif splitter_type == 'recursive':
|
| 52 |
+
return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
| 53 |
+
elif splitter_type == 'token':
|
| 54 |
+
return TokenTextSplitter(chunk_size=max_tokens, chunk_overlap=chunk_overlap)
|
| 55 |
+
else:
|
| 56 |
+
raise ValueError("Unsupported splitter type. Choose from 'character', 'recursive', or 'token'.")
|
| 57 |
+
|
| 58 |
+
# Retriever using Chroma and HuggingFace embeddings
|
| 59 |
+
def retriever_chroma(flag, model_name="BAAI/bge-large-en-v1.5", splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
|
| 60 |
+
# Load or download the embedding model
|
| 61 |
+
embeddings = get_embedding_model(model_name)
|
| 62 |
+
|
| 63 |
+
if not flag:
|
| 64 |
+
# Load the documents
|
| 65 |
+
all_doc = docs_return(0)
|
| 66 |
+
|
| 67 |
+
# Use the splitter parameters
|
| 68 |
+
text_splitter = get_text_splitter(splitter_type=splitter_type, chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator, max_tokens=max_tokens)
|
| 69 |
+
|
| 70 |
+
# Split the documents using the text splitter
|
| 71 |
+
docs = text_splitter.split_documents(documents=all_doc)
|
| 72 |
+
|
| 73 |
+
# Create a Chroma vector database
|
| 74 |
+
vectordb = Chroma.from_documents(docs, embeddings, persist_directory="./chroma_db")
|
| 75 |
+
|
| 76 |
+
# Create the retriever
|
| 77 |
+
chroma_retriever = vectordb.as_retriever(
|
| 78 |
+
search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}
|
| 79 |
+
)
|
| 80 |
+
return chroma_retriever
|
| 81 |
+
else:
|
| 82 |
+
# Load a local Chroma vectorstore
|
| 83 |
+
vectordb = Chroma.load_local("vectorstore", embeddings)
|
| 84 |
+
chroma_retriever = vectordb.as_retriever(
|
| 85 |
+
search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}
|
| 86 |
+
)
|
| 87 |
+
return chroma_retriever
|
| 88 |
+
|
| 89 |
+
# ColBERT retriever
|
| 90 |
+
def colbert_retriever():
|
| 91 |
+
docs = docs_return(1)
|
| 92 |
+
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
|
| 93 |
+
RAG.index(
|
| 94 |
+
collection=[docs],
|
| 95 |
+
index_name="ensemble_colbert",
|
| 96 |
+
max_document_length=256,
|
| 97 |
+
split_documents=True,
|
| 98 |
+
)
|
| 99 |
+
retriever = RAG.as_langchain_retriever(k=3)
|
| 100 |
+
return retriever
|
| 101 |
+
|
| 102 |
+
# Ensemble retriever
|
| 103 |
+
def ensemble_retriever(model_name="BAAI/bge-large-en-v1.5", splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
|
| 104 |
+
retriever1 = colbert_retriever()
|
| 105 |
+
retriever2 = retriever_chroma(False, model_name=model_name, splitter_type=splitter_type, chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator, max_tokens=max_tokens)
|
| 106 |
+
retriever = EnsembleRetriever(retrievers=[retriever1, retriever2], weights=[0.50, 0.50])
|
| 107 |
+
return retriever
|
| 108 |
+
|
| 109 |
+
# Example usage:
|
| 110 |
+
# dat = ensemble_retriever(model_name="sentence-transformers/all-mpnet-base-v2", splitter_type='token', chunk_size=500)
|
| 111 |
+
# data = dat.invoke("What is KUET?")
|
| 112 |
+
# context = ""
|
| 113 |
+
# for x in data[:2]:
|
| 114 |
+
# context += (x.page_content) + "\n"
|
deploy/inference.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import transformers
|
| 5 |
+
from pynvml import *
|
| 6 |
+
import torch
|
| 7 |
+
from langchain import hub
|
| 8 |
+
from model_ret import zephyr_model,llama_model,mistral_model,phi_model,flant5_model
|
| 9 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 10 |
+
from langchain_core.runnables import RunnablePassthrough
|
| 11 |
+
from create_retriever import ensemble_retriever
|
| 12 |
+
# HuggingFace model mapping
|
| 13 |
+
hf_model_map = {
|
| 14 |
+
"Zephyr": "HuggingFaceH4/zephyr-7b-beta",
|
| 15 |
+
"Llama": "NousResearch/Meta-Llama-3-8B",
|
| 16 |
+
"Mistral": "unsloth/mistral-7b-instruct-v0.3",
|
| 17 |
+
"Phi": "microsoft/Phi-3-mini-4k-instruct",
|
| 18 |
+
"Flant5": "google/flan-t5-base"
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
# Model chain class
|
| 22 |
+
class model_chain:
|
| 23 |
+
model_name = ""
|
| 24 |
+
|
| 25 |
+
def __init__(self,
|
| 26 |
+
model_name_local,
|
| 27 |
+
model_name_online="Llama",
|
| 28 |
+
use_local=True,
|
| 29 |
+
embedding_name="BAAI/bge-base-en-v1.5",
|
| 30 |
+
splitter_type_dropdown="character",
|
| 31 |
+
chunk_size_slider=512,
|
| 32 |
+
chunk_overlap_slider=30,
|
| 33 |
+
separator_textbox="\n",
|
| 34 |
+
max_tokens_slider=2048) -> None:
|
| 35 |
+
if use_local:
|
| 36 |
+
quantization, self.model_name = model_name_local.split("_")[0], model_name_local.split("_")[1]
|
| 37 |
+
model_name_temp = model_name_local
|
| 38 |
+
else:
|
| 39 |
+
self.model_name = model_name_online
|
| 40 |
+
model_name_temp = hf_model_map[model_name_online]
|
| 41 |
+
|
| 42 |
+
if self.model_name == "Zephyr":
|
| 43 |
+
self.llm = zephyr_model(model_name_temp, quantization, use_local=use_local)
|
| 44 |
+
elif self.model_name == "Llama":
|
| 45 |
+
self.llm = llama_model(model_name_temp, quantization, use_local=use_local)
|
| 46 |
+
elif self.model_name == "Mistral":
|
| 47 |
+
self.llm = mistral_model(model_name_temp, quantization, use_local=use_local)
|
| 48 |
+
elif self.model_name == "Phi":
|
| 49 |
+
self.llm = phi_model(model_name_temp, quantization, use_local=use_local)
|
| 50 |
+
elif self.model_name == "Flant5":
|
| 51 |
+
self.tokenizer, self.model, self.llm = flant5_model(model_name_temp, use_local=use_local)
|
| 52 |
+
|
| 53 |
+
# Creating the retriever
|
| 54 |
+
self.retriever = ensemble_retriever(embedding_name,
|
| 55 |
+
splitter_type=splitter_type_dropdown,
|
| 56 |
+
chunk_size=chunk_size_slider,
|
| 57 |
+
chunk_overlap=chunk_overlap_slider,
|
| 58 |
+
separator=separator_textbox,
|
| 59 |
+
max_tokens=max_tokens_slider)
|
| 60 |
+
|
| 61 |
+
# Defining the RAG chain
|
| 62 |
+
prompt = hub.pull("rlm/rag-prompt")
|
| 63 |
+
self.rag_chain = (
|
| 64 |
+
{"context": self.retriever | self.format_docs, "question": RunnablePassthrough()}
|
| 65 |
+
| prompt
|
| 66 |
+
| self.llm
|
| 67 |
+
| StrOutputParser()
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Helper function to format documents
|
| 71 |
+
def format_docs(self, docs):
|
| 72 |
+
return "\n\n".join(doc.page_content for doc in docs)
|
| 73 |
+
|
| 74 |
+
# Retrieve RAG chain
|
| 75 |
+
def rag_chain_ret(self):
|
| 76 |
+
return self.rag_chain
|
| 77 |
+
|
| 78 |
+
# Answer retrieval function
|
| 79 |
+
def ans_ret(self, inp, rag_chain):
|
| 80 |
+
if self.model_name == 'Flant5':
|
| 81 |
+
my_question = "What is KUET?"
|
| 82 |
+
data = self.retriever.invoke(inp)
|
| 83 |
+
context = ""
|
| 84 |
+
for x in data[:2]:
|
| 85 |
+
context += (x.page_content) + "\n"
|
| 86 |
+
inputs = f"""Please answer to this question using this context:\n{context}\n{my_question}"""
|
| 87 |
+
inputs = self.tokenizer(inputs, return_tensors="pt")
|
| 88 |
+
outputs = self.model.generate(**inputs)
|
| 89 |
+
answer = self.tokenizer.decode(outputs[0])
|
| 90 |
+
from textwrap import fill
|
| 91 |
+
ans = fill(answer, width=100)
|
| 92 |
+
return ans
|
| 93 |
+
|
| 94 |
+
ans = rag_chain.invoke(inp)
|
| 95 |
+
ans = ans.split("Answer:")[1]
|
| 96 |
+
return ans
|
| 97 |
+
|
| 98 |
+
# def model_push(hf):
|
| 99 |
+
# from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 100 |
+
# if model_name=="Mistral":
|
| 101 |
+
# path="models/full_KUET_LLM_mistral"
|
| 102 |
+
# elif model_name=="Zepyhr":
|
| 103 |
+
# path="models/full_KUET_LLM_zepyhr"
|
| 104 |
+
# elif model_name=="Llama2":
|
| 105 |
+
# path="models/full_KUET_LLM_llama"
|
| 106 |
+
# tokenizer = AutoTokenizer.from_pretrained(path)
|
| 107 |
+
# model = AutoModelForCausalLM.from_pretrained(path,
|
| 108 |
+
# device_map='auto',
|
| 109 |
+
# torch_dtype=torch.float16,
|
| 110 |
+
# use_auth_token=True,
|
| 111 |
+
# load_in_8bit=True,
|
| 112 |
+
# # load_in_4bit=True
|
| 113 |
+
# )
|
| 114 |
+
# model.push_to_hub(repo_id=f"My_model",token=hf)
|
| 115 |
+
# tokenizer.push_to_hub(repo_id=f"My_model",token=hf)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
|
deploy/main.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
from utils import load_params_from_file
|
| 4 |
+
from inference import model_chain
|
| 5 |
+
|
| 6 |
+
infer_ragchain = None
|
| 7 |
+
|
| 8 |
+
# Define the main interface logic
|
| 9 |
+
def echo(message, history, model_name_local, model_name_online,
|
| 10 |
+
inf_checkbox, embedding_name, splitter_type_dropdown, chunk_size_slider,
|
| 11 |
+
chunk_overlap_slider, separator_textbox, max_tokens_slider):
|
| 12 |
+
global infer_ragchain
|
| 13 |
+
if infer_ragchain is None:
|
| 14 |
+
gr.Info("Please wait!!! model is loading!!")
|
| 15 |
+
if inf_checkbox:
|
| 16 |
+
gr.info("local model is loading!!")
|
| 17 |
+
infer_ragchain = model_chain(model_name_local, model_name_online,
|
| 18 |
+
inf_checkbox, embedding_name, splitter_type_dropdown, chunk_size_slider,
|
| 19 |
+
chunk_overlap_slider, separator_textbox, max_tokens_slider)
|
| 20 |
+
rag_chain = infer_ragchain.rag_chain_ret()
|
| 21 |
+
return infer_ragchain.ans_ret(message, rag_chain)
|
| 22 |
+
|
| 23 |
+
# Load saved parameters if available
|
| 24 |
+
saved_params = load_params_from_file()
|
| 25 |
+
|
| 26 |
+
# Set default values
|
| 27 |
+
default_embedding_name = saved_params['embedding_name'] if saved_params else "BAAI/bge-base-en-v1.5"
|
| 28 |
+
default_splitter_type = saved_params['splitter_type_dropdown'] if saved_params else "character"
|
| 29 |
+
default_chunk_size = saved_params['chunk_size_slider'] if saved_params else 500
|
| 30 |
+
default_chunk_overlap = saved_params['chunk_overlap_slider'] if saved_params else 30
|
| 31 |
+
default_separator = saved_params['separator_textbox'] if saved_params else "\n"
|
| 32 |
+
default_max_tokens = saved_params['max_tokens_slider'] if saved_params else 1000
|
| 33 |
+
|
| 34 |
+
# Initialize the Gradio Interface
|
| 35 |
+
with gr.Blocks() as demo:
|
| 36 |
+
with gr.Tab("Inference"):
|
| 37 |
+
with gr.Row():
|
| 38 |
+
embedding_name = gr.Dropdown(choices=["BAAI/bge-base-en-v1.5", "dunzhang/stella_en_1.5B_v5", "dunzhang/stella_en_400M_v5",
|
| 39 |
+
"nvidia/NV-Embed-v2", "Alibaba-NLP/gte-Qwen2-1.5B-instruct"],
|
| 40 |
+
value=default_embedding_name, label="Select the Embedding Model")
|
| 41 |
+
splitter_type_dropdown = gr.Dropdown(choices=["character", "recursive", "token"],
|
| 42 |
+
value=default_splitter_type, label="Splitter Type", interactive=True)
|
| 43 |
+
|
| 44 |
+
chunk_size_slider = gr.Slider(minimum=100, maximum=2000, value=default_chunk_size, step=50, label="Chunk Size")
|
| 45 |
+
chunk_overlap_slider = gr.Slider(minimum=0, maximum=500, value=default_chunk_overlap, step=10, label="Chunk Overlap", interactive=True)
|
| 46 |
+
separator_textbox = gr.Textbox(value=default_separator, label="Separator (e.g., newline '\\n')", interactive=True)
|
| 47 |
+
max_tokens_slider = gr.Slider(minimum=100, maximum=5000, value=default_max_tokens, step=100, label="Max Tokens", interactive=True)
|
| 48 |
+
|
| 49 |
+
inf_checkbox = gr.Checkbox(label="Do you want to use a fine-tuned model?")
|
| 50 |
+
model_name_local = gr.Dropdown(visible=False)
|
| 51 |
+
model_name_online = gr.Dropdown(choices=["Zephyr", "Llama", "Mistral", "Phi", "Flant5"],
|
| 52 |
+
label="Select the LLM from Huggingface", visible=True)
|
| 53 |
+
|
| 54 |
+
# Function to toggle model selection between local and online based on checkbox
|
| 55 |
+
def model_online_local_show(inf_checkbox):
|
| 56 |
+
if inf_checkbox:
|
| 57 |
+
return [gr.Dropdown(choices=os.listdir("models"), label="Select the local LLM", visible=True),
|
| 58 |
+
gr.Dropdown(visible=False)]
|
| 59 |
+
else:
|
| 60 |
+
return [gr.Dropdown(visible=False),
|
| 61 |
+
gr.Dropdown(choices=["Zephyr", "Llama", "Mistral", "Phi", "Flant5"],
|
| 62 |
+
label="Select the LLM from Huggingface", visible=True)]
|
| 63 |
+
|
| 64 |
+
# Event listener to switch between local and online models
|
| 65 |
+
inf_checkbox.change(model_online_local_show, [inf_checkbox], [model_name_local, model_name_online])
|
| 66 |
+
|
| 67 |
+
# Chat interface
|
| 68 |
+
gr.ChatInterface(fn=echo,
|
| 69 |
+
additional_inputs=[model_name_local, model_name_online, inf_checkbox, embedding_name,
|
| 70 |
+
splitter_type_dropdown, chunk_size_slider,
|
| 71 |
+
chunk_overlap_slider, separator_textbox, max_tokens_slider],
|
| 72 |
+
title="Chatbot")
|
| 73 |
+
|
| 74 |
+
# Launch the demo
|
| 75 |
+
demo.launch()
|
deploy/model_ret.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, T5Tokenizer, T5ForConditionalGeneration, pipeline
|
| 2 |
+
from langchain import HuggingFacePipeline
|
| 3 |
+
import os
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
def load_model_and_pipeline(model_info, quantization=4, is_t5=False, use_local=True):
|
| 7 |
+
# Check if the model is local or should be downloaded from Hugging Face
|
| 8 |
+
# if use_local:
|
| 9 |
+
# path = f"models/{model_info}"
|
| 10 |
+
# if not os.path.exists(path):
|
| 11 |
+
# print(f"Local model not found at {path}. Downloading from Hugging Face...")
|
| 12 |
+
# use_local = False # Fallback to Hugging Face download if local not found
|
| 13 |
+
# if not use_local:
|
| 14 |
+
# # Replace model_info with the corresponding Hugging Face repo name
|
| 15 |
+
# hf_model_map = {
|
| 16 |
+
# "zephyr-7b-beta": "HuggingFaceH4/zephyr-7b-beta",
|
| 17 |
+
# "llama-3-8b": "NousResearch/Meta-Llama-3-8B",
|
| 18 |
+
# "mistral-7b": "unsloth/mistral-7b-instruct-v0.3",
|
| 19 |
+
# "phi-3-mini": "microsoft/Phi-3-mini-4k-instruct",
|
| 20 |
+
# "flan-t5-base": "google/flan-t5-base"
|
| 21 |
+
# }
|
| 22 |
+
# path = hf_model_map.get(model_info.split("_")[1], model_info)
|
| 23 |
+
|
| 24 |
+
tokenizer = AutoTokenizer.from_pretrained(model_info, use_auth_token=True)
|
| 25 |
+
|
| 26 |
+
if quantization == "8":
|
| 27 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 28 |
+
model_info,
|
| 29 |
+
device_map='auto',
|
| 30 |
+
torch_dtype=torch.float16,
|
| 31 |
+
use_auth_token=True,
|
| 32 |
+
load_in_8bit=True
|
| 33 |
+
)
|
| 34 |
+
else:
|
| 35 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 36 |
+
model_info,
|
| 37 |
+
device_map='auto',
|
| 38 |
+
torch_dtype=torch.float16,
|
| 39 |
+
use_auth_token=True,
|
| 40 |
+
load_in_4bit=True
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
if is_t5:
|
| 44 |
+
model = T5ForConditionalGeneration.from_pretrained(model_info)
|
| 45 |
+
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
|
| 46 |
+
|
| 47 |
+
pipe = pipeline(
|
| 48 |
+
"text-generation",
|
| 49 |
+
model=model,
|
| 50 |
+
tokenizer=tokenizer,
|
| 51 |
+
torch_dtype=torch.bfloat16,
|
| 52 |
+
device_map="auto",
|
| 53 |
+
max_new_tokens=512,
|
| 54 |
+
do_sample=True,
|
| 55 |
+
top_k=30,
|
| 56 |
+
num_return_sequences=1,
|
| 57 |
+
eos_token_id=tokenizer.eos_token_id
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature': 0})
|
| 61 |
+
return tokenizer, model, llm
|
| 62 |
+
|
| 63 |
+
def zephyr_model(model_info, quantization, use_local=True):
|
| 64 |
+
return load_model_and_pipeline(model_info, quantization, use_local=use_local)
|
| 65 |
+
|
| 66 |
+
def llama_model(model_info, quantization, use_local=True):
|
| 67 |
+
return load_model_and_pipeline(model_info, quantization, use_local=use_local)
|
| 68 |
+
|
| 69 |
+
def mistral_model(model_info, quantization, use_local=True):
|
| 70 |
+
return load_model_and_pipeline(model_info, quantization, use_local=use_local)
|
| 71 |
+
|
| 72 |
+
def phi_model(model_info, quantization, use_local=True):
|
| 73 |
+
return load_model_and_pipeline(model_info, quantization, use_local=use_local)
|
| 74 |
+
|
| 75 |
+
def flant5_model(model_info, use_local=True):
|
| 76 |
+
return load_model_and_pipeline(model_info, is_t5=True, use_local=use_local)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
import pandas as pd
|
| 80 |
+
from datasets import Dataset
|
| 81 |
+
|
| 82 |
+
def calculate_rag_metrics(model_ques_ans_gen, llm_model, embedding_model="BAAI/bge-base-en-v1.5"):
|
| 83 |
+
# Create a dictionary from the model_ques_ans_gen list
|
| 84 |
+
from ragas import evaluate
|
| 85 |
+
from ragas.metrics import faithfulness, answer_correctness,answer_similarity,answer_relevancy,context_recall, context_precision
|
| 86 |
+
data_samples = {
|
| 87 |
+
'question': [item['question'] for item in model_ques_ans_gen],
|
| 88 |
+
'answer': [item['answer'] for item in model_ques_ans_gen],
|
| 89 |
+
'contexts': [item['contexts'] for item in model_ques_ans_gen],
|
| 90 |
+
'ground_truths': [item['ground_truths'] for item in model_ques_ans_gen]
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
# Convert the dictionary to a pandas DataFrame
|
| 94 |
+
rag_df = pd.DataFrame(data_samples)
|
| 95 |
+
|
| 96 |
+
# Convert the DataFrame to a HuggingFace Dataset
|
| 97 |
+
rag_eval_dataset = Dataset.from_pandas(rag_df)
|
| 98 |
+
|
| 99 |
+
# Define the list of metrics to calculate
|
| 100 |
+
metrics = [
|
| 101 |
+
"answer_correctness", "answer_similarity",
|
| 102 |
+
"answer_relevancy", "faithfulness",
|
| 103 |
+
"context_recall", "context_precision"
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
# Perform the evaluation using the provided LLM and embedding models
|
| 107 |
+
result = evaluate(
|
| 108 |
+
rag_eval_dataset,
|
| 109 |
+
metrics=metrics,
|
| 110 |
+
llm=llm_model,
|
| 111 |
+
embeddings=embedding_model
|
| 112 |
+
)
|
| 113 |
+
result.to_pandas()
|
| 114 |
+
return result
|
deploy/params.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "M1", "embedding_name": "BAAI/bge-base-en-v1.5", "splitter_type_dropdown": "character", "chunk_size_slider": 500, "chunk_overlap_slider": 30, "separator_textbox": "\n", "max_tokens_slider": 1000}
|
embedding_tuner.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datasets import load_dataset, Dataset
|
| 2 |
+
from sentence_transformers import (
|
| 3 |
+
SentenceTransformer,
|
| 4 |
+
SentenceTransformerTrainer,
|
| 5 |
+
SentenceTransformerTrainingArguments,
|
| 6 |
+
)
|
| 7 |
+
from sentence_transformers.losses import (
|
| 8 |
+
MultipleNegativesRankingLoss,
|
| 9 |
+
OnlineContrastiveLoss,
|
| 10 |
+
CoSENTLoss,
|
| 11 |
+
GISTEmbedLoss,
|
| 12 |
+
TripletLoss,
|
| 13 |
+
)
|
| 14 |
+
import pandas as pd
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class EmbeddingFinetuner:
|
| 18 |
+
"""
|
| 19 |
+
A class for finetuning SentenceTransformer models on various loss functions.
|
| 20 |
+
|
| 21 |
+
Supports the following loss functions:
|
| 22 |
+
- MultipleNegativesRankingLoss
|
| 23 |
+
- OnlineContrastiveLoss
|
| 24 |
+
- CoSENTLoss
|
| 25 |
+
- GISTEmbedLoss
|
| 26 |
+
- TripletLoss
|
| 27 |
+
|
| 28 |
+
Loads data from an xlsx file named "emb_data.xlsx".
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def __init__(
|
| 32 |
+
self,
|
| 33 |
+
model_name="microsoft/mpnet-base",
|
| 34 |
+
loss_function="MultipleNegativesRankingLoss",
|
| 35 |
+
epochs=1,
|
| 36 |
+
batch_size=16,
|
| 37 |
+
test_size=0.1,
|
| 38 |
+
):
|
| 39 |
+
"""
|
| 40 |
+
Initializes the EmbeddingFinetuner.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
model_name (str): Name of the SentenceTransformer model to use.
|
| 44 |
+
loss_function (str): Name of the loss function to use.
|
| 45 |
+
epochs (int): Number of training epochs.
|
| 46 |
+
batch_size (int): Batch size for training.
|
| 47 |
+
test_size (float): Proportion of the dataset to include in the test split.
|
| 48 |
+
If less than 1, no test set is created.
|
| 49 |
+
"""
|
| 50 |
+
self.model_name = model_name
|
| 51 |
+
self.loss_function = loss_function
|
| 52 |
+
self.epochs = epochs
|
| 53 |
+
self.batch_size = batch_size
|
| 54 |
+
self.test_size = test_size
|
| 55 |
+
|
| 56 |
+
self.model = SentenceTransformer(self.model_name)
|
| 57 |
+
self.train_dataset, self.dev_dataset, self.test_dataset = self._load_data()
|
| 58 |
+
self.loss = self._get_loss_function()
|
| 59 |
+
|
| 60 |
+
def _load_data(self):
|
| 61 |
+
"""
|
| 62 |
+
Loads data from "emb_data.xlsx" and prepares it for the selected loss function.
|
| 63 |
+
"""
|
| 64 |
+
df = pd.read_excel(f"data/emb_data.xlsx")
|
| 65 |
+
|
| 66 |
+
if self.loss_function == "MultipleNegativesRankingLoss":
|
| 67 |
+
"""
|
| 68 |
+
Expects data in the format:
|
| 69 |
+
| anchor | positive | negative |
|
| 70 |
+
|---|---|---|
|
| 71 |
+
| sentence1 | sentence2 | sentence3 |
|
| 72 |
+
| ... | ... | ... |
|
| 73 |
+
|
| 74 |
+
Where 'anchor' is the sentence to be embedded, 'positive' is a sentence
|
| 75 |
+
semantically similar to the anchor, and 'negative' is a sentence
|
| 76 |
+
semantically dissimilar to the anchor.
|
| 77 |
+
"""
|
| 78 |
+
dataset = Dataset.from_pandas(df)
|
| 79 |
+
|
| 80 |
+
elif self.loss_function == "OnlineContrastiveLoss":
|
| 81 |
+
"""
|
| 82 |
+
Expects data in the format:
|
| 83 |
+
| sentence1 | sentence2 | label |
|
| 84 |
+
|---|---|---|
|
| 85 |
+
| sentenceA | sentenceB | 1 |
|
| 86 |
+
| sentenceC | sentenceD | 0 |
|
| 87 |
+
| ... | ... | ... |
|
| 88 |
+
|
| 89 |
+
Where 'sentence1' and 'sentence2' are pairs of sentences, and 'label'
|
| 90 |
+
indicates whether they are semantically similar (1) or dissimilar (0).
|
| 91 |
+
"""
|
| 92 |
+
dataset = Dataset.from_pandas(df)
|
| 93 |
+
|
| 94 |
+
elif self.loss_function == "CoSENTLoss":
|
| 95 |
+
"""
|
| 96 |
+
Expects data in the format:
|
| 97 |
+
| sentence1 | sentence2 | score |
|
| 98 |
+
|---|---|---|
|
| 99 |
+
| sentenceA | sentenceB | 0.8 |
|
| 100 |
+
| sentenceC | sentenceD | 0.2 |
|
| 101 |
+
| ... | ... | ... |
|
| 102 |
+
|
| 103 |
+
Where 'sentence1' and 'sentence2' are pairs of sentences, and 'score'
|
| 104 |
+
is a float value representing their similarity (e.g., from 0 to 1).
|
| 105 |
+
"""
|
| 106 |
+
dataset = Dataset.from_pandas(df)
|
| 107 |
+
|
| 108 |
+
elif self.loss_function == "GISTEmbedLoss":
|
| 109 |
+
"""
|
| 110 |
+
Expects data in either of the following formats:
|
| 111 |
+
|
| 112 |
+
Triplets:
|
| 113 |
+
| anchor | positive | negative |
|
| 114 |
+
|---|---|---|
|
| 115 |
+
| sentence1 | sentence2 | sentence3 |
|
| 116 |
+
| ... | ... | ... |
|
| 117 |
+
|
| 118 |
+
Pairs:
|
| 119 |
+
| anchor | positive |
|
| 120 |
+
|---|---|
|
| 121 |
+
| sentence1 | sentence2 |
|
| 122 |
+
| ... | ... |
|
| 123 |
+
|
| 124 |
+
Where 'anchor' is the sentence to be embedded, 'positive' is a sentence
|
| 125 |
+
semantically similar to the anchor, and 'negative' (if present) is a
|
| 126 |
+
sentence semantically dissimilar to the anchor.
|
| 127 |
+
"""
|
| 128 |
+
dataset = Dataset.from_pandas(df)
|
| 129 |
+
|
| 130 |
+
elif self.loss_function == "TripletLoss":
|
| 131 |
+
"""
|
| 132 |
+
Expects data in the format:
|
| 133 |
+
| anchor | positive | negative |
|
| 134 |
+
|---|---|---|
|
| 135 |
+
| sentence1 | sentence2 | sentence3 |
|
| 136 |
+
| ... | ... | ... |
|
| 137 |
+
|
| 138 |
+
Where 'anchor' is the sentence to be embedded, 'positive' is a sentence
|
| 139 |
+
semantically similar to the anchor, and 'negative' is a sentence
|
| 140 |
+
semantically dissimilar to the anchor.
|
| 141 |
+
"""
|
| 142 |
+
dataset = Dataset.from_pandas(df)
|
| 143 |
+
|
| 144 |
+
else:
|
| 145 |
+
raise ValueError(f"Unsupported loss function: {self.loss_function}")
|
| 146 |
+
|
| 147 |
+
# Split into train and dev
|
| 148 |
+
train_dev_dataset = dataset.train_test_split(test_size=self.test_size)
|
| 149 |
+
train_dataset = train_dev_dataset["train"]
|
| 150 |
+
dev_dataset = train_dev_dataset["test"]
|
| 151 |
+
test_dataset = None
|
| 152 |
+
|
| 153 |
+
return train_dataset, dev_dataset, test_dataset
|
| 154 |
+
|
| 155 |
+
def _get_loss_function(self):
|
| 156 |
+
"""
|
| 157 |
+
Returns the selected loss function instance.
|
| 158 |
+
"""
|
| 159 |
+
if self.loss_function == "MultipleNegativesRankingLoss":
|
| 160 |
+
return MultipleNegativesRankingLoss(self.model)
|
| 161 |
+
elif self.loss_function == "OnlineContrastiveLoss":
|
| 162 |
+
return OnlineContrastiveLoss(self.model)
|
| 163 |
+
elif self.loss_function == "CoSENTLoss":
|
| 164 |
+
return CoSENTLoss(self.model)
|
| 165 |
+
elif self.loss_function == "GISTEmbedLoss":
|
| 166 |
+
guide_model = SentenceTransformer("all-MiniLM-L6-v2") # You can change this
|
| 167 |
+
return GISTEmbedLoss(self.model, guide_model)
|
| 168 |
+
elif self.loss_function == "TripletLoss":
|
| 169 |
+
return TripletLoss(self.model)
|
| 170 |
+
else:
|
| 171 |
+
raise ValueError(f"Unsupported loss function: {self.loss_function}")
|
| 172 |
+
|
| 173 |
+
def train(self):
|
| 174 |
+
"""
|
| 175 |
+
Trains the SentenceTransformer model using the specified loss function.
|
| 176 |
+
"""
|
| 177 |
+
args = SentenceTransformerTrainingArguments(
|
| 178 |
+
output_dir=f"models/{self.model_name}-{self.loss_function}",
|
| 179 |
+
num_train_epochs=self.epochs,
|
| 180 |
+
per_device_train_batch_size=self.batch_size,
|
| 181 |
+
per_device_eval_batch_size=self.batch_size,
|
| 182 |
+
evaluation_strategy="epoch",
|
| 183 |
+
# ... other training arguments as needed ...
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
trainer = SentenceTransformerTrainer(
|
| 187 |
+
model=self.model,
|
| 188 |
+
args=args,
|
| 189 |
+
train_dataset=self.train_dataset,
|
| 190 |
+
eval_dataset=self.dev_dataset,
|
| 191 |
+
loss=self.loss,
|
| 192 |
+
)
|
| 193 |
+
trainer.train()
|
| 194 |
+
|
| 195 |
+
# Save the trained model
|
| 196 |
+
self.model.save_pretrained(
|
| 197 |
+
f"models/emb-{self.model_name}-{self.loss_function}"
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
return True
|
| 201 |
+
|
| 202 |
+
|
fine_tune_file/finetune_file.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#code changed
|
| 2 |
+
import os
|
| 3 |
+
import torch
|
| 4 |
+
from datasets import load_dataset, Dataset
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import transformers
|
| 7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 8 |
+
from trl import SFTTrainer
|
| 9 |
+
import transformers
|
| 10 |
+
# from peft import AutoPeftModelForCausalLM
|
| 11 |
+
from transformers import GenerationConfig
|
| 12 |
+
from pynvml import *
|
| 13 |
+
import glob
|
| 14 |
+
class custom_model_trainer:
|
| 15 |
+
lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout=5e-6,3,4,4,4,16,32,.05 # setup the parameter accoring to your model.
|
| 16 |
+
def formatted_text(self, x ,tokenizer):
|
| 17 |
+
# change this templete according to your model
|
| 18 |
+
#Example:
|
| 19 |
+
temp = [
|
| 20 |
+
{"role": "user", "content": """You are a helpful chatbot, help users by answering their queries.
|
| 21 |
+
Question: """ + x["question"]},
|
| 22 |
+
{"role": "assistant", "content": x["answer"]}
|
| 23 |
+
]
|
| 24 |
+
return tokenizer.apply_chat_template(temp, add_generation_prompt=False, tokenize=False)
|
| 25 |
+
def custom_model_finetune(self):
|
| 26 |
+
base_model = 'Qwen/Qwen2.5-0.5B-Instruct' # Write the base model repo name from huggingface
|
| 27 |
+
lora_output = f'models/q{self.quantization}_mymodel_lora' # Write the folder name for saving lora output
|
| 28 |
+
full_output = f'models/q{self.quantization}_mymodel_full' # Write the folder name for saving full model output
|
| 29 |
+
DEVICE = 'cuda'
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
| 31 |
+
tokenizer.padding_side = 'right'
|
| 32 |
+
### read csv with Prompt, Answer pair
|
| 33 |
+
data_location = r"data/finetune_data.xlsx" ## replace here
|
| 34 |
+
data_df=pd.read_excel( data_location )
|
| 35 |
+
### set formatting
|
| 36 |
+
data_df["text"] = data_df[["question", "answer"]].apply(lambda x: self.formatted_text(x,tokenizer), axis=1) ## replace Prompt and Answer if collected dataset has different column names
|
| 37 |
+
print(data_df.iloc[0])
|
| 38 |
+
dataset = Dataset.from_pandas(data_df)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# set quantization config
|
| 42 |
+
if self.quantization == '8':
|
| 43 |
+
bnb_config = BitsAndBytesConfig(
|
| 44 |
+
load_in_8bit= True,
|
| 45 |
+
)
|
| 46 |
+
else:
|
| 47 |
+
bnb_config = BitsAndBytesConfig(
|
| 48 |
+
load_in_4bit= True,
|
| 49 |
+
bnb_4bit_use_double_quant=True,
|
| 50 |
+
bnb_4bit_quant_type="nf4",
|
| 51 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
| 52 |
+
)
|
| 53 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 54 |
+
base_model,
|
| 55 |
+
quantization_config=bnb_config,
|
| 56 |
+
torch_dtype=torch.bfloat16,
|
| 57 |
+
device_map="auto",
|
| 58 |
+
trust_remote_code=True,
|
| 59 |
+
)
|
| 60 |
+
model.config.use_cache = False # silence the warnings
|
| 61 |
+
model.config.pretraining_tp = 1
|
| 62 |
+
model.gradient_checkpointing_enable()
|
| 63 |
+
|
| 64 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
| 65 |
+
tokenizer.padding_side = 'right'
|
| 66 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 67 |
+
tokenizer.add_eos_token = True
|
| 68 |
+
# tokenizer.add_bos_token, tokenizer.add_eos_token
|
| 69 |
+
|
| 70 |
+
# Set PEFT adapter config (16:32)
|
| 71 |
+
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
| 72 |
+
|
| 73 |
+
# target modules are currently selected for zephyr base model
|
| 74 |
+
config = LoraConfig(
|
| 75 |
+
r= self.lora_r if self.lora_r else 16,
|
| 76 |
+
lora_alpha= self.lora_alpha if self.lora_alpha else 32,
|
| 77 |
+
target_modules=["q_proj", "v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"], # target all the linear layers for full finetuning
|
| 78 |
+
lora_dropout= self.lora_dropout if self.lora_dropout else 0.05,
|
| 79 |
+
bias="none",
|
| 80 |
+
task_type="CAUSAL_LM")
|
| 81 |
+
|
| 82 |
+
# stabilize output layer and layernorms
|
| 83 |
+
model = prepare_model_for_kbit_training(model, self.quantization)
|
| 84 |
+
# Set PEFT adapter on model (Last step)
|
| 85 |
+
model = get_peft_model(model, config)
|
| 86 |
+
|
| 87 |
+
# Set Hyperparameters
|
| 88 |
+
MAXLEN=512
|
| 89 |
+
BATCH_SIZE = self.batch_size if self.batch_size else 4
|
| 90 |
+
GRAD_ACC = self.gradient_accumulation if self.gradient_accumulation else 4
|
| 91 |
+
OPTIMIZER ='paged_adamw_8bit' # save memory
|
| 92 |
+
LR=self.lr if self.lr else 5e-06 # slightly smaller than pretraining lr | and close to LoRA standard
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,
|
| 96 |
+
gradient_accumulation_steps=GRAD_ACC,
|
| 97 |
+
optim=OPTIMIZER,
|
| 98 |
+
learning_rate=LR,
|
| 99 |
+
fp16=True, # consider compatibility when using bf16
|
| 100 |
+
logging_steps=10,
|
| 101 |
+
num_train_epochs = self.epoch if self.epoch else 2,
|
| 102 |
+
output_dir=lora_output,
|
| 103 |
+
remove_unused_columns=True,
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
# Set collator
|
| 107 |
+
data_collator = transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
|
| 108 |
+
|
| 109 |
+
# Setup trainer
|
| 110 |
+
trainer = SFTTrainer(model=model,
|
| 111 |
+
train_dataset=dataset,
|
| 112 |
+
data_collator=data_collator,
|
| 113 |
+
args=training_config,
|
| 114 |
+
dataset_text_field="text",
|
| 115 |
+
# callbacks=[early_stop], need to learn, lora easily overfits
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
trainer.train()
|
| 119 |
+
|
| 120 |
+
trainer.save_model(lora_output)
|
| 121 |
+
|
| 122 |
+
# Get peft config
|
| 123 |
+
from peft import PeftConfig
|
| 124 |
+
config = PeftConfig.from_pretrained(lora_output)
|
| 125 |
+
|
| 126 |
+
model = transformers.AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
|
| 127 |
+
|
| 128 |
+
# Load the Lora model
|
| 129 |
+
from peft import PeftModel
|
| 130 |
+
model = PeftModel.from_pretrained(model, lora_output)
|
| 131 |
+
|
| 132 |
+
# Get tokenizer
|
| 133 |
+
tokenizer = transformers.AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
| 134 |
+
merged_model = model.merge_and_unload()
|
| 135 |
+
|
| 136 |
+
merged_model.save_pretrained(full_output)
|
| 137 |
+
tokenizer.save_pretrained(full_output)
|
| 138 |
+
print("*"*10,": Model is saved!!!")
|
fine_tune_file/flant5_finetune.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import nltk
|
| 2 |
+
import evaluate
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from datasets import load_dataset, Dataset
|
| 6 |
+
from transformers import T5Tokenizer, DataCollatorForSeq2Seq
|
| 7 |
+
from transformers import T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer
|
| 8 |
+
import os
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
os.environ["WANDB_DISABLED"] = "true"
|
| 11 |
+
|
| 12 |
+
class flant5_trainer:
|
| 13 |
+
def __init__(self) -> None:
|
| 14 |
+
# Load the tokenizer, model, and data collator
|
| 15 |
+
MODEL_NAME = "google/flan-t5-base"
|
| 16 |
+
self.tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
|
| 17 |
+
self.model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME)
|
| 18 |
+
self.data_collator = DataCollatorForSeq2Seq(tokenizer=self.tokenizer, model=self.model)
|
| 19 |
+
nltk.download("punkt", quiet=True)
|
| 20 |
+
self.metric = evaluate.load("rouge")
|
| 21 |
+
def preprocess_function(self,examples):
|
| 22 |
+
"""Add prefix to the sentences, tokenize the text, and set the labels"""
|
| 23 |
+
# The "inputs" are the tokenized answer:
|
| 24 |
+
inputs = [self.prefix + doc for doc in examples["question"]]
|
| 25 |
+
model_inputs = self.tokenizer(inputs, max_length=1024, truncation=True)
|
| 26 |
+
|
| 27 |
+
# The "labels" are the tokenized outputs:
|
| 28 |
+
labels = self.tokenizer(text_target=examples["answer"],
|
| 29 |
+
max_length=1024,
|
| 30 |
+
truncation=True)
|
| 31 |
+
|
| 32 |
+
model_inputs["labels"] = labels["input_ids"]
|
| 33 |
+
return model_inputs
|
| 34 |
+
def compute_metrics(self,eval_preds):
|
| 35 |
+
preds, labels = eval_preds
|
| 36 |
+
|
| 37 |
+
# decode preds and labels
|
| 38 |
+
labels = np.where(labels != -100, labels, self.tokenizer.pad_token_id)
|
| 39 |
+
decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True)
|
| 40 |
+
decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True)
|
| 41 |
+
|
| 42 |
+
# rougeLSum expects newline after each sentence
|
| 43 |
+
decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
|
| 44 |
+
decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]
|
| 45 |
+
|
| 46 |
+
result = self.metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
|
| 47 |
+
|
| 48 |
+
return result
|
| 49 |
+
def flant5_finetune(self,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout):
|
| 50 |
+
|
| 51 |
+
data_location = r"data/finetune_data.xlsx"
|
| 52 |
+
data_df=pd.read_excel(data_location)
|
| 53 |
+
dataset = Dataset.from_pandas(data_df)
|
| 54 |
+
# We prefix our tasks with "answer the question"
|
| 55 |
+
self.prefix = "Please answer this question: "
|
| 56 |
+
|
| 57 |
+
# Define the preprocessing function
|
| 58 |
+
dataset = dataset.train_test_split(test_size=0.1)
|
| 59 |
+
# Map the preprocessing function across our dataset
|
| 60 |
+
tokenized_dataset = dataset.map(self.preprocess_function, batched=True)
|
| 61 |
+
|
| 62 |
+
# Global Parameters
|
| 63 |
+
|
| 64 |
+
PER_DEVICE_EVAL_BATCH = 4
|
| 65 |
+
WEIGHT_DECAY = 0.01
|
| 66 |
+
SAVE_TOTAL_LIM = 3
|
| 67 |
+
|
| 68 |
+
# Set up training arguments
|
| 69 |
+
training_args = Seq2SeqTrainingArguments(
|
| 70 |
+
output_dir="./results",
|
| 71 |
+
evaluation_strategy="epoch",
|
| 72 |
+
learning_rate=lr,
|
| 73 |
+
per_device_train_batch_size=batch_size,
|
| 74 |
+
per_device_eval_batch_size=PER_DEVICE_EVAL_BATCH,
|
| 75 |
+
weight_decay=WEIGHT_DECAY,
|
| 76 |
+
save_total_limit=SAVE_TOTAL_LIM,
|
| 77 |
+
num_train_epochs=epoch,
|
| 78 |
+
predict_with_generate=True,
|
| 79 |
+
push_to_hub=False,
|
| 80 |
+
gradient_accumulation_steps=gradient_accumulation
|
| 81 |
+
)
|
| 82 |
+
trainer = Seq2SeqTrainer(
|
| 83 |
+
model=self.model,
|
| 84 |
+
args=training_args,
|
| 85 |
+
train_dataset=tokenized_dataset["train"],
|
| 86 |
+
eval_dataset=tokenized_dataset["test"],
|
| 87 |
+
tokenizer=self.tokenizer,
|
| 88 |
+
data_collator=self.data_collator,
|
| 89 |
+
compute_metrics=self.compute_metrics
|
| 90 |
+
)
|
| 91 |
+
trainer.train()
|
| 92 |
+
|
| 93 |
+
current_time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
| 94 |
+
full_output = f'models/0_flant5_{current_time}'
|
| 95 |
+
self.model.save_pretrained(full_output)
|
| 96 |
+
self.model.save_pretrained(full_output)
|
| 97 |
+
print("*"*10,": Model is saved!!!")
|
fine_tune_file/llama_finetune.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
from datasets import load_dataset, Dataset
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import transformers
|
| 6 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 7 |
+
from trl import SFTTrainer
|
| 8 |
+
import transformers
|
| 9 |
+
# from peft import AutoPeftModelForCausalLM
|
| 10 |
+
from transformers import GenerationConfig
|
| 11 |
+
from pynvml import *
|
| 12 |
+
import glob
|
| 13 |
+
class llama_trainer:
|
| 14 |
+
def llama_model(self,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout):
|
| 15 |
+
# base_model = "NousResearch/Llama-2-7b-chat-hf"
|
| 16 |
+
base_model="NousResearch/Meta-Llama-3-8B"
|
| 17 |
+
# base_model="unsloth/Meta-Llama-3.1-8B-Instruct"
|
| 18 |
+
from datetime import datetime
|
| 19 |
+
lora_output = f'models/{quantization}_Llama_lora_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
|
| 20 |
+
full_output = f'models/{quantization}_Llama_full_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
|
| 21 |
+
DEVICE = 'cuda'
|
| 22 |
+
|
| 23 |
+
# set quantization config
|
| 24 |
+
bnb_config = BitsAndBytesConfig(
|
| 25 |
+
load_in_8bit= True,
|
| 26 |
+
)
|
| 27 |
+
if quantization == 4:
|
| 28 |
+
bnb_config = BitsAndBytesConfig(
|
| 29 |
+
load_in_4bit= True,
|
| 30 |
+
bnb_4bit_use_double_quant=True,
|
| 31 |
+
bnb_4bit_quant_type="nf4",
|
| 32 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
| 33 |
+
)
|
| 34 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 35 |
+
base_model,
|
| 36 |
+
quantization_config=bnb_config,
|
| 37 |
+
torch_dtype=torch.bfloat16,
|
| 38 |
+
device_map="auto",
|
| 39 |
+
trust_remote_code=True,
|
| 40 |
+
)
|
| 41 |
+
model.config.use_cache = False # silence the warnings
|
| 42 |
+
model.config.pretraining_tp = 1
|
| 43 |
+
model.gradient_checkpointing_enable()
|
| 44 |
+
|
| 45 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
| 46 |
+
tokenizer.padding_side = 'right'
|
| 47 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 48 |
+
tokenizer.add_eos_token = True
|
| 49 |
+
tokenizer.add_eos_token
|
| 50 |
+
|
| 51 |
+
data_location = r"data/finetune_data.xlsx" ## replace here
|
| 52 |
+
data_df=pd.read_excel( data_location )
|
| 53 |
+
|
| 54 |
+
for i in range(len(data_df)):
|
| 55 |
+
|
| 56 |
+
data_df.loc[i,'Text']="### Instruction:"+str(data_df.loc[i,'question'])+"### Response:"+str(data_df.loc[i,'answer'])
|
| 57 |
+
|
| 58 |
+
dataset = Dataset.from_pandas(data_df)
|
| 59 |
+
|
| 60 |
+
# Set PEFT adapter config (16:32)
|
| 61 |
+
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
| 62 |
+
|
| 63 |
+
config = LoraConfig(
|
| 64 |
+
r= lora_r if lora_r else 16,
|
| 65 |
+
lora_alpha= lora_alpha if lora_alpha else 32,
|
| 66 |
+
target_modules=["q_proj", "v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"], # target all the linear layers for full finetuning
|
| 67 |
+
lora_dropout= lora_dropout if lora_dropout else 0.05,
|
| 68 |
+
bias="none",
|
| 69 |
+
task_type="CAUSAL_LM")
|
| 70 |
+
|
| 71 |
+
# stabilize output layer and layernorms
|
| 72 |
+
model = prepare_model_for_kbit_training(model)
|
| 73 |
+
# Set PEFT adapter on model (Last step)
|
| 74 |
+
model = get_peft_model(model, config)
|
| 75 |
+
|
| 76 |
+
# Set Hyperparameters
|
| 77 |
+
MAXLEN=512
|
| 78 |
+
BATCH_SIZE = batch_size if batch_size else 4
|
| 79 |
+
GRAD_ACC = gradient_accumulation if gradient_accumulation else 4
|
| 80 |
+
OPTIMIZER ='paged_adamw_8bit' # save memory
|
| 81 |
+
LR=lr if lr else 5e-06 # slightly smaller than pretraining lr | and close to LoRA standard
|
| 82 |
+
|
| 83 |
+
# Set training config
|
| 84 |
+
training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,
|
| 85 |
+
gradient_accumulation_steps=GRAD_ACC,
|
| 86 |
+
optim=OPTIMIZER,
|
| 87 |
+
learning_rate=LR,
|
| 88 |
+
fp16=True, # consider compatibility when using bf16
|
| 89 |
+
logging_steps=10,
|
| 90 |
+
num_train_epochs = epoch if epoch else 2,
|
| 91 |
+
output_dir=lora_output,
|
| 92 |
+
remove_unused_columns=True,
|
| 93 |
+
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# Set collator
|
| 97 |
+
data_collator = transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
|
| 98 |
+
|
| 99 |
+
# Setup trainer
|
| 100 |
+
trainer = SFTTrainer(model=model,
|
| 101 |
+
train_dataset=dataset,
|
| 102 |
+
data_collator=data_collator,
|
| 103 |
+
args=training_config,
|
| 104 |
+
dataset_text_field="Text",
|
| 105 |
+
# callbacks=[early_stop], need to learn, lora easily overfits
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
trainer.train()
|
| 109 |
+
|
| 110 |
+
trainer.save_model(lora_output)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# Get peft config
|
| 114 |
+
from peft import PeftConfig
|
| 115 |
+
config = PeftConfig.from_pretrained(lora_output)
|
| 116 |
+
|
| 117 |
+
model = transformers.AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
|
| 118 |
+
|
| 119 |
+
# Load the Lora model
|
| 120 |
+
from peft import PeftModel
|
| 121 |
+
model = PeftModel.from_pretrained(model, lora_output)
|
| 122 |
+
|
| 123 |
+
# Get tokenizer
|
| 124 |
+
tokenizer = transformers.AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
| 125 |
+
merged_model = model.merge_and_unload()
|
| 126 |
+
|
| 127 |
+
merged_model.save_pretrained(full_output)
|
| 128 |
+
tokenizer.save_pretrained(full_output)
|
| 129 |
+
print("*"*10,": Model is saved!!!")
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
lm=llama_trainer()
|
| 133 |
+
lm.llama_model(5e-6,2,4,4,8,16,32,.05)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
|
fine_tune_file/mistral_finetune.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# %% Saved
|
| 2 |
+
import os
|
| 3 |
+
import torch
|
| 4 |
+
from datasets import load_dataset, Dataset
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import transformers
|
| 7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 8 |
+
from trl import SFTTrainer
|
| 9 |
+
import transformers
|
| 10 |
+
# from peft import AutoPeftModelForCausalLM
|
| 11 |
+
from transformers import GenerationConfig
|
| 12 |
+
from pynvml import *
|
| 13 |
+
import glob
|
| 14 |
+
class mistral_trainer:
|
| 15 |
+
def formatted_text(self,x,tokenizer):
|
| 16 |
+
temp = [
|
| 17 |
+
# {"role": "system", "content": "Answer as a medical assistant. Respond concisely."},
|
| 18 |
+
{"role": "user", "content": """You are a helpful chatbot, help users by answering their queries.
|
| 19 |
+
Question: """ + x["question"]},
|
| 20 |
+
{"role": "assistant", "content": x["answer"]}
|
| 21 |
+
]
|
| 22 |
+
return tokenizer.apply_chat_template(temp, add_generation_prompt=False, tokenize=False)
|
| 23 |
+
def mistral_finetune(self,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout):
|
| 24 |
+
# base_model = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 25 |
+
base_model="unsloth/mistral-7b-instruct-v0.3"
|
| 26 |
+
from datetime import datetime
|
| 27 |
+
lora_output = f'models/{quantization}_Mistral_lora_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
|
| 28 |
+
full_output = f'models/{quantization}_Mistral_full_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
|
| 29 |
+
DEVICE = 'cuda'
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
| 31 |
+
tokenizer.padding_side = 'right'
|
| 32 |
+
### read csv with Prompt, Answer pair
|
| 33 |
+
data_location = r"data/finetune_data.xlsx" ## replace here
|
| 34 |
+
data_df=pd.read_excel(data_location)
|
| 35 |
+
### set formatting
|
| 36 |
+
data_df["text"] = data_df[["question", "answer"]].apply(lambda x: self.formatted_text(x,tokenizer), axis=1) ## replace Prompt and Answer if collected dataset has different column names
|
| 37 |
+
print(data_df.iloc[0])
|
| 38 |
+
dataset = Dataset.from_pandas(data_df)
|
| 39 |
+
# set quantization config
|
| 40 |
+
bnb_config = BitsAndBytesConfig(
|
| 41 |
+
load_in_8bit= True,
|
| 42 |
+
)
|
| 43 |
+
if quantization == 4:
|
| 44 |
+
print("*"*10,": 4 bit quantization")
|
| 45 |
+
bnb_config = BitsAndBytesConfig(
|
| 46 |
+
load_in_4bit= True,
|
| 47 |
+
bnb_4bit_use_double_quant=True,
|
| 48 |
+
bnb_4bit_quant_type="nf4",
|
| 49 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
| 50 |
+
)
|
| 51 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 52 |
+
base_model,
|
| 53 |
+
quantization_config=bnb_config,
|
| 54 |
+
torch_dtype=torch.bfloat16,
|
| 55 |
+
device_map="auto",
|
| 56 |
+
trust_remote_code=True,
|
| 57 |
+
)
|
| 58 |
+
model.config.use_cache = False # silence the warnings
|
| 59 |
+
model.config.pretraining_tp = 1
|
| 60 |
+
model.gradient_checkpointing_enable()
|
| 61 |
+
|
| 62 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
| 63 |
+
tokenizer.padding_side = 'right'
|
| 64 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 65 |
+
tokenizer.add_eos_token = True
|
| 66 |
+
tokenizer.add_bos_token, tokenizer.add_eos_token
|
| 67 |
+
|
| 68 |
+
# Set PEFT adapter config (16:32)
|
| 69 |
+
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
| 70 |
+
|
| 71 |
+
# target modules are currently selected for zephyr base model
|
| 72 |
+
config = LoraConfig(
|
| 73 |
+
r= lora_r if lora_r else 16,
|
| 74 |
+
lora_alpha= lora_alpha if lora_alpha else 32,
|
| 75 |
+
target_modules=["q_proj", "v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"], # target all the linear layers for full finetuning
|
| 76 |
+
lora_dropout= lora_dropout if lora_dropout else 0.05,
|
| 77 |
+
bias="none",
|
| 78 |
+
task_type="CAUSAL_LM")
|
| 79 |
+
|
| 80 |
+
# stabilize output layer and layernorms
|
| 81 |
+
model = prepare_model_for_kbit_training(model)
|
| 82 |
+
# Set PEFT adapter on model (Last step)
|
| 83 |
+
model = get_peft_model(model, config)
|
| 84 |
+
|
| 85 |
+
# Set Hyperparameters
|
| 86 |
+
MAXLEN=512
|
| 87 |
+
BATCH_SIZE = batch_size if batch_size else 4
|
| 88 |
+
GRAD_ACC = gradient_accumulation if gradient_accumulation else 4
|
| 89 |
+
OPTIMIZER ='paged_adamw_8bit' # save memory
|
| 90 |
+
LR=lr if lr else 5e-06 # slightly smaller than pretraining lr | and close to LoRA standard
|
| 91 |
+
|
| 92 |
+
training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,
|
| 93 |
+
gradient_accumulation_steps=GRAD_ACC,
|
| 94 |
+
optim=OPTIMIZER,
|
| 95 |
+
learning_rate=LR,
|
| 96 |
+
fp16=True, # consider compatibility when using bf16
|
| 97 |
+
logging_steps=10,
|
| 98 |
+
num_train_epochs = epoch if epoch else 2,
|
| 99 |
+
output_dir=lora_output,
|
| 100 |
+
remove_unused_columns=True,
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
# Set collator
|
| 104 |
+
data_collator = transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
|
| 105 |
+
|
| 106 |
+
# Setup trainer
|
| 107 |
+
trainer = SFTTrainer(model=model,
|
| 108 |
+
train_dataset=dataset,
|
| 109 |
+
data_collator=data_collator,
|
| 110 |
+
args=training_config,
|
| 111 |
+
dataset_text_field="text",
|
| 112 |
+
# callbacks=[early_stop], need to learn, lora easily overfits
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
trainer.train()
|
| 116 |
+
print("*"*10,": Finetune ended!!!!")
|
| 117 |
+
trainer.save_model(lora_output)
|
| 118 |
+
|
| 119 |
+
# Get peft config
|
| 120 |
+
from peft import PeftConfig
|
| 121 |
+
config = PeftConfig.from_pretrained(lora_output)
|
| 122 |
+
|
| 123 |
+
model = transformers.AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
|
| 124 |
+
|
| 125 |
+
# Load the Lora model
|
| 126 |
+
from peft import PeftModel
|
| 127 |
+
model = PeftModel.from_pretrained(model, lora_output)
|
| 128 |
+
|
| 129 |
+
# Get tokenizer
|
| 130 |
+
tokenizer = transformers.AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
| 131 |
+
merged_model = model.merge_and_unload()
|
| 132 |
+
|
| 133 |
+
merged_model.save_pretrained(full_output)
|
| 134 |
+
tokenizer.save_pretrained(full_output)
|
| 135 |
+
print("*"*10,": Model is saved!!!")
|
| 136 |
+
|
| 137 |
+
mis=mistral_trainer()
|
| 138 |
+
mis.mistral_finetune(5e-6,2,4,4,8,16,32,.05)
|
fine_tune_file/modular_finetune.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from datasets import Dataset
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import transformers
|
| 7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, T5Tokenizer, T5ForConditionalGeneration
|
| 8 |
+
from trl import SFTTrainer
|
| 9 |
+
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftConfig, PeftModel
|
| 10 |
+
import os
|
| 11 |
+
# cache_folder="/mnt/FA00A16100A1259B/shakib/model_cache"
|
| 12 |
+
# os.environ['HF_HOME'] = cache_folder
|
| 13 |
+
# os.environ['HF_DATASETS_CACHE'] = cache_folder
|
| 14 |
+
# os.environ['TRANSFORMERS_CACHE'] = cache_folder
|
| 15 |
+
# print("*"*20)
|
| 16 |
+
# print(os.environ['HF_HOME'])
|
| 17 |
+
# print(os.environ['HF_DATASETS_CACHE'])
|
| 18 |
+
# print(os.environ['TRANSFORMERS_CACHE'])
|
| 19 |
+
# print(os.path.isdir(cache_folder))
|
| 20 |
+
# assert(False)
|
| 21 |
+
class BaseTrainer:
|
| 22 |
+
def __init__(self, model_name, base_model):
|
| 23 |
+
self.model_name = model_name
|
| 24 |
+
self.base_model = base_model
|
| 25 |
+
self.tokenizer = self.load_tokenizer()
|
| 26 |
+
self.model = self.load_model()
|
| 27 |
+
|
| 28 |
+
def load_tokenizer(self):
|
| 29 |
+
tokenizer = AutoTokenizer.from_pretrained(self.base_model, trust_remote_code=True)
|
| 30 |
+
tokenizer.padding_side = 'right'
|
| 31 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 32 |
+
return tokenizer
|
| 33 |
+
|
| 34 |
+
def load_model(self):
|
| 35 |
+
raise NotImplementedError("Subclasses must implement load_model method")
|
| 36 |
+
|
| 37 |
+
def preprocess_function(self, examples):
|
| 38 |
+
raise NotImplementedError("Subclasses must implement preprocess_function method")
|
| 39 |
+
|
| 40 |
+
def formatted_text(self, x):
|
| 41 |
+
self.tokenizer.chat_template = {
|
| 42 |
+
"role": "user",
|
| 43 |
+
"content": "You are a helpful chatbot. Question: {question}\nAnswer: {answer}"
|
| 44 |
+
}
|
| 45 |
+
temp = [
|
| 46 |
+
{"role": "user", "content": f"You are a helpful chatbot, help users by answering their queries.\nQuestion: {x['question']}"},
|
| 47 |
+
{"role": "assistant", "content": x["answer"]}
|
| 48 |
+
]
|
| 49 |
+
return self.tokenizer.apply_chat_template(temp, add_generation_prompt=False, tokenize=False,chat_template='content')
|
| 50 |
+
|
| 51 |
+
def train(self, lr, epoch, batch_size, gradient_accumulation, quantization, lora_r, lora_alpha, lora_dropout):
|
| 52 |
+
csv_file = "data/finetune_data.csv"
|
| 53 |
+
xlsx_file = "data/finetune_data.xlsx"
|
| 54 |
+
if os.path.exists(csv_file):
|
| 55 |
+
data_df = pd.read_csv(csv_file)
|
| 56 |
+
print("Reading CSV file...")
|
| 57 |
+
elif os.path.exists(xlsx_file):
|
| 58 |
+
data_df = pd.read_excel(xlsx_file)
|
| 59 |
+
print("CSV file not found, reading Excel file...")
|
| 60 |
+
|
| 61 |
+
data_df["text"] = data_df[["question", "answer"]].apply(lambda x: self.formatted_text(x), axis=1)
|
| 62 |
+
dataset = Dataset.from_pandas(data_df)
|
| 63 |
+
|
| 64 |
+
lora_output = f'models/{quantization}_{self.model_name}_lora_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
|
| 65 |
+
full_output = f'models/{quantization}_{self.model_name}_full_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
|
| 66 |
+
|
| 67 |
+
config = LoraConfig(
|
| 68 |
+
r=lora_r or 16,
|
| 69 |
+
lora_alpha=lora_alpha or 32,
|
| 70 |
+
target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
|
| 71 |
+
lora_dropout=lora_dropout or 0.05,
|
| 72 |
+
bias="none",
|
| 73 |
+
task_type="CAUSAL_LM"
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
self.model = prepare_model_for_kbit_training(self.model)
|
| 77 |
+
self.model = get_peft_model(self.model, config)
|
| 78 |
+
|
| 79 |
+
training_args = transformers.TrainingArguments(
|
| 80 |
+
per_device_train_batch_size=batch_size or 4,
|
| 81 |
+
gradient_accumulation_steps=gradient_accumulation or 4,
|
| 82 |
+
optim='paged_adamw_8bit',
|
| 83 |
+
learning_rate=lr or 5e-6,
|
| 84 |
+
fp16=True,
|
| 85 |
+
logging_steps=10,
|
| 86 |
+
num_train_epochs=epoch or 2,
|
| 87 |
+
output_dir=lora_output,
|
| 88 |
+
remove_unused_columns=True,
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
data_collator = transformers.DataCollatorForLanguageModeling(self.tokenizer, mlm=False)
|
| 92 |
+
|
| 93 |
+
trainer = SFTTrainer(
|
| 94 |
+
model=self.model,
|
| 95 |
+
train_dataset=dataset,
|
| 96 |
+
data_collator=data_collator,
|
| 97 |
+
args=training_args,
|
| 98 |
+
dataset_text_field="text",
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
trainer.train()
|
| 102 |
+
trainer.save_model(lora_output)
|
| 103 |
+
|
| 104 |
+
config = PeftConfig.from_pretrained(lora_output)
|
| 105 |
+
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
|
| 106 |
+
model = PeftModel.from_pretrained(model, lora_output)
|
| 107 |
+
|
| 108 |
+
merged_model = model.merge_and_unload()
|
| 109 |
+
merged_model.save_pretrained(full_output)
|
| 110 |
+
self.tokenizer.save_pretrained(full_output)
|
| 111 |
+
print("*" * 10, ": Model is saved!!!")
|
| 112 |
+
|
| 113 |
+
class LlamaTrainer(BaseTrainer):
|
| 114 |
+
def load_model(self):
|
| 115 |
+
bnb_config = BitsAndBytesConfig(
|
| 116 |
+
load_in_4bit=True,
|
| 117 |
+
bnb_4bit_use_double_quant=True,
|
| 118 |
+
bnb_4bit_quant_type="nf4",
|
| 119 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
| 120 |
+
)
|
| 121 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 122 |
+
self.base_model,
|
| 123 |
+
quantization_config=bnb_config,
|
| 124 |
+
torch_dtype=torch.bfloat16,
|
| 125 |
+
device_map="auto",
|
| 126 |
+
trust_remote_code=True,
|
| 127 |
+
# cache_dir=cache_folder
|
| 128 |
+
)
|
| 129 |
+
model.config.use_cache = False
|
| 130 |
+
model.config.pretraining_tp = 1
|
| 131 |
+
model.gradient_checkpointing_enable()
|
| 132 |
+
return model
|
| 133 |
+
|
| 134 |
+
class MistralTrainer(BaseTrainer):
|
| 135 |
+
def load_model(self):
|
| 136 |
+
return LlamaTrainer.load_model(self)
|
| 137 |
+
|
| 138 |
+
class PhiTrainer(BaseTrainer):
|
| 139 |
+
def load_model(self):
|
| 140 |
+
return LlamaTrainer.load_model(self)
|
| 141 |
+
|
| 142 |
+
class ZephyrTrainer(BaseTrainer):
|
| 143 |
+
def load_model(self):
|
| 144 |
+
return LlamaTrainer.load_model(self)
|
| 145 |
+
|
| 146 |
+
class FlanT5Trainer(BaseTrainer):
|
| 147 |
+
def load_tokenizer(self):
|
| 148 |
+
tokenizer = T5Tokenizer.from_pretrained(self.base_model)
|
| 149 |
+
tokenizer.padding_side = 'right'
|
| 150 |
+
return tokenizer
|
| 151 |
+
|
| 152 |
+
def load_model(self):
|
| 153 |
+
model = T5ForConditionalGeneration.from_pretrained(self.base_model)
|
| 154 |
+
return model
|
| 155 |
+
|
| 156 |
+
def preprocess_function(self, examples):
|
| 157 |
+
prefix = "Please answer this question: "
|
| 158 |
+
inputs = [prefix + doc for doc in examples["question"]]
|
| 159 |
+
model_inputs = self.tokenizer(inputs, max_length=1024, truncation=True)
|
| 160 |
+
labels = self.tokenizer(text_target=examples["answer"], max_length=1024, truncation=True)
|
| 161 |
+
model_inputs["labels"] = labels["input_ids"]
|
| 162 |
+
return model_inputs
|
| 163 |
+
|
| 164 |
+
def get_trainer(model_name):
|
| 165 |
+
model_map = {
|
| 166 |
+
"Llama": ("NousResearch/Meta-Llama-3-8B", LlamaTrainer),
|
| 167 |
+
"Mistral": ("unsloth/mistral-7b-instruct-v0.3", MistralTrainer),
|
| 168 |
+
"Phi": ("microsoft/Phi-3-mini-4k-instruct", PhiTrainer),
|
| 169 |
+
"Zephyr": ("HuggingFaceH4/zephyr-7b-beta", ZephyrTrainer),
|
| 170 |
+
"Flant5": ("google/flan-t5-base", FlanT5Trainer),
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
base_model, trainer_class = model_map[model_name]
|
| 174 |
+
if not base_model or not trainer_class:
|
| 175 |
+
raise ValueError(f"Unsupported model: {model_name}")
|
| 176 |
+
|
| 177 |
+
return trainer_class(model_name, base_model)
|
fine_tune_file/phi_finetune.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# %% Saved
|
| 2 |
+
import os
|
| 3 |
+
import torch
|
| 4 |
+
from datasets import load_dataset, Dataset
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import transformers
|
| 7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 8 |
+
from trl import SFTTrainer
|
| 9 |
+
import transformers
|
| 10 |
+
# from peft import AutoPeftModelForCausalLM
|
| 11 |
+
from transformers import GenerationConfig
|
| 12 |
+
from pynvml import *
|
| 13 |
+
import glob
|
| 14 |
+
class phi_trainer:
|
| 15 |
+
def formatted_text(self,x,tokenizer):
|
| 16 |
+
temp = [
|
| 17 |
+
# {"role": "system", "content": "Answer as a medical assistant. Respond concisely."},
|
| 18 |
+
{"role": "user", "content": """You are a helpful chatbot, help users by answering their queries.
|
| 19 |
+
Question: """ + x["question"]},
|
| 20 |
+
{"role": "assistant", "content": x["answer"]}
|
| 21 |
+
]
|
| 22 |
+
return tokenizer.apply_chat_template(temp, add_generation_prompt=False, tokenize=False)
|
| 23 |
+
def phi_finetune(self,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout):
|
| 24 |
+
base_model = "microsoft/Phi-3-mini-4k-instruct"
|
| 25 |
+
from datetime import datetime
|
| 26 |
+
lora_output = f'models/{quantization}_Phi_lora_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
|
| 27 |
+
full_output = f'models/{quantization}_Phi_full_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
|
| 28 |
+
DEVICE = 'cuda'
|
| 29 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
| 30 |
+
tokenizer.padding_side = 'right'
|
| 31 |
+
### read csv with Prompt, Answer pair
|
| 32 |
+
data_location = r"data/finetune_data.xlsx" ## replace here
|
| 33 |
+
data_df=pd.read_excel( data_location )
|
| 34 |
+
### set formatting
|
| 35 |
+
data_df["text"] = data_df[["question", "answer"]].apply(lambda x: self.formatted_text(x,tokenizer), axis=1) ## replace Prompt and Answer if collected dataset has different column names
|
| 36 |
+
print(data_df.iloc[0])
|
| 37 |
+
dataset = Dataset.from_pandas(data_df)
|
| 38 |
+
# set quantization config
|
| 39 |
+
bnb_config = BitsAndBytesConfig(
|
| 40 |
+
load_in_8bit= True,
|
| 41 |
+
)
|
| 42 |
+
if quantization == 4:
|
| 43 |
+
print("*"*10,": 4 bit quantization")
|
| 44 |
+
bnb_config = BitsAndBytesConfig(
|
| 45 |
+
load_in_4bit= True,
|
| 46 |
+
bnb_4bit_use_double_quant=True,
|
| 47 |
+
bnb_4bit_quant_type="nf4",
|
| 48 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
| 49 |
+
)
|
| 50 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 51 |
+
base_model,
|
| 52 |
+
quantization_config=bnb_config,
|
| 53 |
+
torch_dtype=torch.bfloat16,
|
| 54 |
+
device_map="auto",
|
| 55 |
+
trust_remote_code=True,
|
| 56 |
+
)
|
| 57 |
+
model.config.use_cache = False # silence the warnings
|
| 58 |
+
model.config.pretraining_tp = 1
|
| 59 |
+
model.gradient_checkpointing_enable()
|
| 60 |
+
|
| 61 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
| 62 |
+
tokenizer.padding_side = 'right'
|
| 63 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# Set PEFT adapter config (16:32)
|
| 67 |
+
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
| 68 |
+
|
| 69 |
+
# target modules are currently selected for zephyr base model
|
| 70 |
+
config = LoraConfig(
|
| 71 |
+
r= lora_r if lora_r else 16,
|
| 72 |
+
lora_alpha= lora_alpha if lora_alpha else 32,
|
| 73 |
+
target_modules=["q_proj", "v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"], # target all the linear layers for full finetuning
|
| 74 |
+
lora_dropout= lora_dropout if lora_dropout else 0.05,
|
| 75 |
+
bias="none",
|
| 76 |
+
task_type="CAUSAL_LM")
|
| 77 |
+
|
| 78 |
+
# stabilize output layer and layernorms
|
| 79 |
+
model = prepare_model_for_kbit_training(model)
|
| 80 |
+
# Set PEFT adapter on model (Last step)
|
| 81 |
+
model = get_peft_model(model, config)
|
| 82 |
+
|
| 83 |
+
# Set Hyperparameters
|
| 84 |
+
MAXLEN=512
|
| 85 |
+
BATCH_SIZE = batch_size if batch_size else 4
|
| 86 |
+
GRAD_ACC = gradient_accumulation if gradient_accumulation else 4
|
| 87 |
+
OPTIMIZER ='paged_adamw_8bit' # save memory
|
| 88 |
+
LR=lr if lr else 5e-06 # slightly smaller than pretraining lr | and close to LoRA standard
|
| 89 |
+
|
| 90 |
+
training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,
|
| 91 |
+
gradient_accumulation_steps=GRAD_ACC,
|
| 92 |
+
optim=OPTIMIZER,
|
| 93 |
+
learning_rate=LR,
|
| 94 |
+
fp16=True, # consider compatibility when using bf16
|
| 95 |
+
logging_steps=10,
|
| 96 |
+
num_train_epochs = epoch if epoch else 2,
|
| 97 |
+
output_dir=lora_output,
|
| 98 |
+
remove_unused_columns=True,
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# Set collator
|
| 102 |
+
data_collator = transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
|
| 103 |
+
|
| 104 |
+
# Setup trainer
|
| 105 |
+
trainer = SFTTrainer(model=model,
|
| 106 |
+
train_dataset=dataset,
|
| 107 |
+
data_collator=data_collator,
|
| 108 |
+
args=training_config,
|
| 109 |
+
dataset_text_field="text",
|
| 110 |
+
# callbacks=[early_stop], need to learn, lora easily overfits
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
trainer.train()
|
| 114 |
+
print("*"*10,": Finetune ended!!!!")
|
| 115 |
+
trainer.save_model(lora_output)
|
| 116 |
+
|
| 117 |
+
# Get peft config
|
| 118 |
+
from peft import PeftConfig
|
| 119 |
+
config = PeftConfig.from_pretrained(lora_output)
|
| 120 |
+
|
| 121 |
+
model = transformers.AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,trust_remote_code=True)
|
| 122 |
+
|
| 123 |
+
# tokenizer = transformers.AutoTokenizer.from_pretrained(base_model)
|
| 124 |
+
|
| 125 |
+
# Load the Lora model
|
| 126 |
+
from peft import PeftModel
|
| 127 |
+
model = PeftModel.from_pretrained(model, lora_output)
|
| 128 |
+
|
| 129 |
+
# Get tokenizer
|
| 130 |
+
tokenizer = transformers.AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
| 131 |
+
merged_model = model.merge_and_unload()
|
| 132 |
+
|
| 133 |
+
merged_model.save_pretrained(full_output)
|
| 134 |
+
tokenizer.save_pretrained(full_output)
|
| 135 |
+
print("*"*10,": Model is saved!!!")
|
fine_tune_file/zepyhr_finetune.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#code changed
|
| 2 |
+
import os
|
| 3 |
+
import torch
|
| 4 |
+
from datasets import load_dataset, Dataset
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import transformers
|
| 7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 8 |
+
from trl import SFTTrainer
|
| 9 |
+
import transformers
|
| 10 |
+
# from peft import AutoPeftModelForCausalLM
|
| 11 |
+
from transformers import GenerationConfig
|
| 12 |
+
from pynvml import *
|
| 13 |
+
import glob
|
| 14 |
+
class zephyr_trainer:
|
| 15 |
+
def formatted_text(self,x,tokenizer):
|
| 16 |
+
temp = [
|
| 17 |
+
# {"role": "system", "content": "Answer as a medical assistant. Respond concisely."},
|
| 18 |
+
{"role": "user", "content": """You are a helpful chatbot, help users by answering their queries.
|
| 19 |
+
Question: """ + x["question"]},
|
| 20 |
+
{"role": "assistant", "content": x["answer"]}
|
| 21 |
+
]
|
| 22 |
+
return tokenizer.apply_chat_template(temp, add_generation_prompt=False, tokenize=False)
|
| 23 |
+
def zepyhr_model(self,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout):
|
| 24 |
+
base_model = 'HuggingFaceH4/zephyr-7b-beta'
|
| 25 |
+
from datetime import datetime
|
| 26 |
+
lora_output = f'models/{quantization}_Zepyhr_lora_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
|
| 27 |
+
full_output = f'models/{quantization}_Zepyhr_full_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
|
| 28 |
+
DEVICE = 'cuda'
|
| 29 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
| 30 |
+
tokenizer.padding_side = 'right'
|
| 31 |
+
### read csv with Prompt, Answer pair
|
| 32 |
+
data_location = r"data/finetune_data.xlsx" ## replace here
|
| 33 |
+
data_df=pd.read_excel( data_location )
|
| 34 |
+
### set formatting
|
| 35 |
+
data_df["text"] = data_df[["question", "answer"]].apply(lambda x: self.formatted_text(x,tokenizer), axis=1) ## replace Prompt and Answer if collected dataset has different column names
|
| 36 |
+
print(data_df.iloc[0])
|
| 37 |
+
dataset = Dataset.from_pandas(data_df)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# set quantization config
|
| 41 |
+
bnb_config = BitsAndBytesConfig(
|
| 42 |
+
load_in_8bit= True,
|
| 43 |
+
)
|
| 44 |
+
if quantization == 4:
|
| 45 |
+
bnb_config = BitsAndBytesConfig(
|
| 46 |
+
load_in_4bit= True,
|
| 47 |
+
bnb_4bit_use_double_quant=True,
|
| 48 |
+
bnb_4bit_quant_type="nf4",
|
| 49 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
| 50 |
+
)
|
| 51 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 52 |
+
base_model,
|
| 53 |
+
quantization_config=bnb_config,
|
| 54 |
+
torch_dtype=torch.bfloat16,
|
| 55 |
+
device_map="auto",
|
| 56 |
+
trust_remote_code=True,
|
| 57 |
+
)
|
| 58 |
+
model.config.use_cache = False # silence the warnings
|
| 59 |
+
model.config.pretraining_tp = 1
|
| 60 |
+
model.gradient_checkpointing_enable()
|
| 61 |
+
|
| 62 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
| 63 |
+
tokenizer.padding_side = 'right'
|
| 64 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 65 |
+
tokenizer.add_eos_token = True
|
| 66 |
+
tokenizer.add_bos_token, tokenizer.add_eos_token
|
| 67 |
+
|
| 68 |
+
# Set PEFT adapter config (16:32)
|
| 69 |
+
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
| 70 |
+
|
| 71 |
+
# target modules are currently selected for zephyr base model
|
| 72 |
+
config = LoraConfig(
|
| 73 |
+
r= lora_r if lora_r else 16,
|
| 74 |
+
lora_alpha= lora_alpha if lora_alpha else 32,
|
| 75 |
+
target_modules=["q_proj", "v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"], # target all the linear layers for full finetuning
|
| 76 |
+
lora_dropout= lora_dropout if lora_dropout else 0.05,
|
| 77 |
+
bias="none",
|
| 78 |
+
task_type="CAUSAL_LM")
|
| 79 |
+
|
| 80 |
+
# stabilize output layer and layernorms
|
| 81 |
+
model = prepare_model_for_kbit_training(model)
|
| 82 |
+
# Set PEFT adapter on model (Last step)
|
| 83 |
+
model = get_peft_model(model, config)
|
| 84 |
+
|
| 85 |
+
# Set Hyperparameters
|
| 86 |
+
MAXLEN=512
|
| 87 |
+
BATCH_SIZE = batch_size if batch_size else 4
|
| 88 |
+
GRAD_ACC = gradient_accumulation if gradient_accumulation else 4
|
| 89 |
+
OPTIMIZER ='paged_adamw_8bit' # save memory
|
| 90 |
+
LR=lr if lr else 5e-06 # slightly smaller than pretraining lr | and close to LoRA standard
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,
|
| 94 |
+
gradient_accumulation_steps=GRAD_ACC,
|
| 95 |
+
optim=OPTIMIZER,
|
| 96 |
+
learning_rate=LR,
|
| 97 |
+
fp16=True, # consider compatibility when using bf16
|
| 98 |
+
logging_steps=10,
|
| 99 |
+
num_train_epochs = epoch if epoch else 2,
|
| 100 |
+
output_dir=lora_output,
|
| 101 |
+
remove_unused_columns=True,
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# Set collator
|
| 105 |
+
data_collator = transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
|
| 106 |
+
|
| 107 |
+
# Setup trainer
|
| 108 |
+
trainer = SFTTrainer(model=model,
|
| 109 |
+
train_dataset=dataset,
|
| 110 |
+
data_collator=data_collator,
|
| 111 |
+
args=training_config,
|
| 112 |
+
dataset_text_field="text",
|
| 113 |
+
# callbacks=[early_stop], need to learn, lora easily overfits
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
trainer.train()
|
| 117 |
+
print("*"*10,": Finetune ended!!!!")
|
| 118 |
+
trainer.save_model(lora_output)
|
| 119 |
+
|
| 120 |
+
# Get peft config
|
| 121 |
+
from peft import PeftConfig
|
| 122 |
+
config = PeftConfig.from_pretrained(lora_output)
|
| 123 |
+
|
| 124 |
+
model = transformers.AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
|
| 125 |
+
|
| 126 |
+
# Load the Lora model
|
| 127 |
+
from peft import PeftModel
|
| 128 |
+
model = PeftModel.from_pretrained(model, lora_output)
|
| 129 |
+
|
| 130 |
+
# Get tokenizer
|
| 131 |
+
tokenizer = transformers.AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
| 132 |
+
merged_model = model.merge_and_unload()
|
| 133 |
+
|
| 134 |
+
merged_model.save_pretrained(full_output)
|
| 135 |
+
tokenizer.save_pretrained(full_output)
|
| 136 |
+
print("*"*10,": Model is saved!!!")
|
inference.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import transformers
|
| 5 |
+
from pynvml import *
|
| 6 |
+
import torch
|
| 7 |
+
from langchain import hub
|
| 8 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 9 |
+
from langchain_core.runnables import RunnablePassthrough
|
| 10 |
+
from model_ret import load_model_and_pipeline
|
| 11 |
+
from create_retriever import retriever_chroma
|
| 12 |
+
|
| 13 |
+
# Model chain class
|
| 14 |
+
class model_chain:
|
| 15 |
+
model_name = ""
|
| 16 |
+
|
| 17 |
+
def __init__(self,
|
| 18 |
+
model_name_local,
|
| 19 |
+
model_name_online="Llama",
|
| 20 |
+
use_online=True,
|
| 21 |
+
embedding_name="BAAI/bge-base-en-v1.5",
|
| 22 |
+
splitter_type_dropdown="character",
|
| 23 |
+
chunk_size_slider=512,
|
| 24 |
+
chunk_overlap_slider=30,
|
| 25 |
+
separator_textbox="\n",
|
| 26 |
+
max_tokens_slider=2048) -> None:
|
| 27 |
+
if os.path.exists(f"models//{model_name_local}") and len(os.listdir(f"models//{model_name_local}")):
|
| 28 |
+
import gradio as gr
|
| 29 |
+
gr.Info("Model *()* from online!!")
|
| 30 |
+
self.model_name = model_name_local
|
| 31 |
+
else:
|
| 32 |
+
self.model_name = model_name_online
|
| 33 |
+
|
| 34 |
+
self.tokenizer, self.model, self.llm = load_model_and_pipeline(self.model_name)
|
| 35 |
+
# Creating the retriever
|
| 36 |
+
# self.retriever = ensemble_retriever(embedding_name,
|
| 37 |
+
# splitter_type=splitter_type_dropdown,
|
| 38 |
+
# chunk_size=chunk_size_slider,
|
| 39 |
+
# chunk_overlap=chunk_overlap_slider,
|
| 40 |
+
# separator=separator_textbox,
|
| 41 |
+
# max_tokens=max_tokens_slider)
|
| 42 |
+
self.retriever = retriever_chroma(False, embedding_name, splitter_type_dropdown,
|
| 43 |
+
chunk_size_slider, chunk_size_slider,
|
| 44 |
+
separator_textbox, max_tokens_slider)
|
| 45 |
+
|
| 46 |
+
# Defining the RAG chain
|
| 47 |
+
prompt = hub.pull("rlm/rag-prompt")
|
| 48 |
+
self.rag_chain = (
|
| 49 |
+
{"context": self.retriever | self.format_docs, "question": RunnablePassthrough()}
|
| 50 |
+
| prompt
|
| 51 |
+
| self.llm
|
| 52 |
+
| StrOutputParser()
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# Helper function to format documents
|
| 56 |
+
def format_docs(self, docs):
|
| 57 |
+
return "\n\n".join(doc.page_content for doc in docs)
|
| 58 |
+
|
| 59 |
+
# Retrieve RAG chain
|
| 60 |
+
def rag_chain_ret(self):
|
| 61 |
+
return self.rag_chain
|
| 62 |
+
|
| 63 |
+
# Answer retrieval function
|
| 64 |
+
def ans_ret(self, inp):
|
| 65 |
+
if self.model_name == 'Flant5':
|
| 66 |
+
my_question = "What is KUET?"
|
| 67 |
+
data = self.retriever.invoke(inp)
|
| 68 |
+
context = ""
|
| 69 |
+
for x in data[:2]:
|
| 70 |
+
context += (x.page_content) + "\n"
|
| 71 |
+
inputs = f"""Please answer to this question using this context:\n{context}\n{my_question}"""
|
| 72 |
+
inputs = self.tokenizer(inputs, return_tensors="pt")
|
| 73 |
+
outputs = self.model.generate(**inputs)
|
| 74 |
+
answer = self.tokenizer.decode(outputs[0])
|
| 75 |
+
from textwrap import fill
|
| 76 |
+
ans = fill(answer, width=100)
|
| 77 |
+
return ans
|
| 78 |
+
|
| 79 |
+
ans = self.rag_chain.invoke(inp)
|
| 80 |
+
ans = ans.split("Answer:")[1]
|
| 81 |
+
return ans
|
model_ret.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
from time import time
|
| 4 |
+
from torch import cuda, bfloat16
|
| 5 |
+
from langchain import HuggingFacePipeline
|
| 6 |
+
from transformers import (AutoTokenizer, AutoModelForCausalLM, T5Tokenizer, AutoConfig,
|
| 7 |
+
T5ForConditionalGeneration, pipeline, BitsAndBytesConfig)
|
| 8 |
+
|
| 9 |
+
def load_model_and_pipeline(model_id, temperature=0):
|
| 10 |
+
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
|
| 11 |
+
|
| 12 |
+
bnb_config = BitsAndBytesConfig(
|
| 13 |
+
load_in_4bit=True,
|
| 14 |
+
bnb_4bit_quant_type='nf4',
|
| 15 |
+
bnb_4bit_use_double_quant=True,
|
| 16 |
+
bnb_4bit_compute_dtype=bfloat16
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
time_1 = time()
|
| 20 |
+
model_config = AutoConfig.from_pretrained(
|
| 21 |
+
model_id,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=True)
|
| 25 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 26 |
+
model_id,
|
| 27 |
+
trust_remote_code=True,
|
| 28 |
+
config=model_config,
|
| 29 |
+
quantization_config=bnb_config,
|
| 30 |
+
device_map='auto',
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 34 |
+
time_2 = time()
|
| 35 |
+
print(f"Prepare model, tokenizer: {round(time_2-time_1, 3)} sec.")
|
| 36 |
+
time_1 = time()
|
| 37 |
+
|
| 38 |
+
pipe = pipeline(
|
| 39 |
+
"text-generation",
|
| 40 |
+
model=model,
|
| 41 |
+
tokenizer=tokenizer,
|
| 42 |
+
dtype=torch.float16,
|
| 43 |
+
device_map="auto",
|
| 44 |
+
max_new_tokens=512,
|
| 45 |
+
do_sample=True,
|
| 46 |
+
top_k=30,
|
| 47 |
+
num_return_sequences=1,
|
| 48 |
+
eos_token_id=tokenizer.eos_token_id)
|
| 49 |
+
time_2 = time()
|
| 50 |
+
print(f"Prepare pipeline: {round(time_2-time_1, 3)} sec.")
|
| 51 |
+
|
| 52 |
+
llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature': temperature})
|
| 53 |
+
return tokenizer, model, llm
|
| 54 |
+
|
| 55 |
+
import pandas as pd
|
| 56 |
+
from datasets import Dataset
|
| 57 |
+
|
| 58 |
+
def calculate_rag_metrics(model_ques_ans_gen, llm_model, embedding_model="BAAI/bge-base-en-v1.5"):
|
| 59 |
+
# Create a dictionary from the model_ques_ans_gen list
|
| 60 |
+
from ragas import evaluate
|
| 61 |
+
from ragas.metrics import faithfulness, answer_correctness,answer_similarity,answer_relevancy,context_recall, context_precision
|
| 62 |
+
# context = {'contexts':'NA'}
|
| 63 |
+
# print([item['question'] for item in model_ques_ans_gen])
|
| 64 |
+
data_samples = {
|
| 65 |
+
'question': [item['question'] for item in model_ques_ans_gen],
|
| 66 |
+
'answer': [item['answer'] for item in model_ques_ans_gen],
|
| 67 |
+
'contexts': [[''] for item in model_ques_ans_gen],
|
| 68 |
+
'reference': [item['ground_truths'] for item in model_ques_ans_gen]
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
# Convert the dictionary to a pandas DataFrame
|
| 72 |
+
rag_df = pd.DataFrame(data_samples)
|
| 73 |
+
|
| 74 |
+
# Convert the DataFrame to a HuggingFace Dataset
|
| 75 |
+
rag_eval_dataset = Dataset.from_pandas(rag_df)
|
| 76 |
+
|
| 77 |
+
# Define the list of metrics to calculate
|
| 78 |
+
metrics = [
|
| 79 |
+
answer_correctness, answer_similarity,
|
| 80 |
+
answer_relevancy, faithfulness,
|
| 81 |
+
context_recall, context_precision
|
| 82 |
+
]
|
| 83 |
+
|
| 84 |
+
# Perform the evaluation using the provided LLM and embedding models
|
| 85 |
+
result = evaluate(
|
| 86 |
+
rag_eval_dataset,
|
| 87 |
+
metrics=metrics,
|
| 88 |
+
llm=llm_model,
|
| 89 |
+
embeddings=embedding_model
|
| 90 |
+
)
|
| 91 |
+
# result.to_pandas()
|
| 92 |
+
return result.to_pandas()
|
model_ret.py.old
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 2 |
+
from langchain import HuggingFacePipeline
|
| 3 |
+
from transformers import pipeline
|
| 4 |
+
import transformers
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import T5Tokenizer
|
| 7 |
+
from transformers import T5ForConditionalGeneration
|
| 8 |
+
def zepyhr_model(model_info,quanization):
|
| 9 |
+
path=f"models/{model_info}"
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained(path,
|
| 11 |
+
use_auth_token=True,)
|
| 12 |
+
|
| 13 |
+
if quanization=="8":
|
| 14 |
+
model = AutoModelForCausalLM.from_pretrained(path,
|
| 15 |
+
device_map='auto',
|
| 16 |
+
torch_dtype=torch.float16,
|
| 17 |
+
use_auth_token=True,
|
| 18 |
+
load_in_8bit=True,
|
| 19 |
+
)
|
| 20 |
+
else:
|
| 21 |
+
model = AutoModelForCausalLM.from_pretrained(path,
|
| 22 |
+
device_map='auto',
|
| 23 |
+
torch_dtype=torch.float16,
|
| 24 |
+
use_auth_token=True,
|
| 25 |
+
load_in_4bit=True
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
pipe = pipeline("text-generation",
|
| 29 |
+
model=model,
|
| 30 |
+
tokenizer= tokenizer,
|
| 31 |
+
torch_dtype=torch.bfloat16,
|
| 32 |
+
device_map="auto",
|
| 33 |
+
max_new_tokens = 512,
|
| 34 |
+
do_sample=True,
|
| 35 |
+
top_k=30,
|
| 36 |
+
num_return_sequences=1,
|
| 37 |
+
eos_token_id=tokenizer.eos_token_id
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
|
| 41 |
+
return llm
|
| 42 |
+
def llama_model(model_info,quanization):
|
| 43 |
+
path=f"models/{model_info}"
|
| 44 |
+
tokenizer = AutoTokenizer.from_pretrained(path,
|
| 45 |
+
use_auth_token=True,)
|
| 46 |
+
|
| 47 |
+
if quanization=="8":
|
| 48 |
+
model = AutoModelForCausalLM.from_pretrained(path,
|
| 49 |
+
device_map='auto',
|
| 50 |
+
torch_dtype=torch.float16,
|
| 51 |
+
use_auth_token=True,
|
| 52 |
+
load_in_8bit=True,
|
| 53 |
+
)
|
| 54 |
+
else:
|
| 55 |
+
model = AutoModelForCausalLM.from_pretrained(path,
|
| 56 |
+
device_map='auto',
|
| 57 |
+
torch_dtype=torch.float16,
|
| 58 |
+
use_auth_token=True,
|
| 59 |
+
load_in_4bit=True
|
| 60 |
+
)
|
| 61 |
+
pipe = pipeline("text-generation",
|
| 62 |
+
model=model,
|
| 63 |
+
tokenizer= tokenizer,
|
| 64 |
+
torch_dtype=torch.bfloat16,
|
| 65 |
+
device_map="auto",
|
| 66 |
+
max_new_tokens = 512,
|
| 67 |
+
do_sample=True,
|
| 68 |
+
top_k=30,
|
| 69 |
+
num_return_sequences=1,
|
| 70 |
+
eos_token_id=tokenizer.eos_token_id
|
| 71 |
+
)
|
| 72 |
+
llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
|
| 73 |
+
return llm
|
| 74 |
+
|
| 75 |
+
def mistral_model(model_info,quanization):
|
| 76 |
+
path=f"models/{model_info}"
|
| 77 |
+
tokenizer = AutoTokenizer.from_pretrained(path,
|
| 78 |
+
use_auth_token=True,)
|
| 79 |
+
|
| 80 |
+
if quanization=="8":
|
| 81 |
+
model = AutoModelForCausalLM.from_pretrained(path,
|
| 82 |
+
device_map='auto',
|
| 83 |
+
torch_dtype=torch.float16,
|
| 84 |
+
use_auth_token=True,
|
| 85 |
+
load_in_8bit=True,
|
| 86 |
+
)
|
| 87 |
+
else:
|
| 88 |
+
model = AutoModelForCausalLM.from_pretrained(path,
|
| 89 |
+
device_map='auto',
|
| 90 |
+
torch_dtype=torch.float16,
|
| 91 |
+
use_auth_token=True,
|
| 92 |
+
load_in_4bit=True
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
pipe = pipeline("text-generation",
|
| 98 |
+
model=model,
|
| 99 |
+
tokenizer= tokenizer,
|
| 100 |
+
torch_dtype=torch.bfloat16,
|
| 101 |
+
device_map="auto",
|
| 102 |
+
max_new_tokens = 512,
|
| 103 |
+
do_sample=True,
|
| 104 |
+
top_k=30,
|
| 105 |
+
num_return_sequences=1,
|
| 106 |
+
eos_token_id=tokenizer.eos_token_id
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
|
| 110 |
+
return llm
|
| 111 |
+
|
| 112 |
+
def phi_model(model_info,quanization):
|
| 113 |
+
path=f"models/{model_info}"
|
| 114 |
+
tokenizer = AutoTokenizer.from_pretrained(path,
|
| 115 |
+
use_auth_token=True,)
|
| 116 |
+
|
| 117 |
+
if quanization=="8":
|
| 118 |
+
model = AutoModelForCausalLM.from_pretrained(path,
|
| 119 |
+
device_map='auto',
|
| 120 |
+
torch_dtype=torch.float16,
|
| 121 |
+
use_auth_token=True,
|
| 122 |
+
load_in_8bit=True,
|
| 123 |
+
trust_remote_code=True
|
| 124 |
+
)
|
| 125 |
+
else:
|
| 126 |
+
model = AutoModelForCausalLM.from_pretrained(path,
|
| 127 |
+
device_map='auto',
|
| 128 |
+
torch_dtype=torch.float16,
|
| 129 |
+
use_auth_token=True,
|
| 130 |
+
load_in_4bit=True,
|
| 131 |
+
trust_remote_code=True
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
pipe = pipeline("text-generation",
|
| 137 |
+
model=model,
|
| 138 |
+
tokenizer= tokenizer,
|
| 139 |
+
torch_dtype=torch.bfloat16,
|
| 140 |
+
device_map="auto",
|
| 141 |
+
max_new_tokens = 512,
|
| 142 |
+
do_sample=True,
|
| 143 |
+
top_k=30,
|
| 144 |
+
num_return_sequences=1,
|
| 145 |
+
eos_token_id=tokenizer.eos_token_id
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
|
| 149 |
+
return llm
|
| 150 |
+
|
| 151 |
+
def flant5_model(model_info):
|
| 152 |
+
path=f"models/{model_info}"
|
| 153 |
+
model = T5ForConditionalGeneration.from_pretrained(path)
|
| 154 |
+
MODEL_NAME = "google/flan-t5-base"
|
| 155 |
+
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
|
| 156 |
+
pipe = pipeline("text-generation",
|
| 157 |
+
model=model,
|
| 158 |
+
tokenizer= tokenizer,
|
| 159 |
+
torch_dtype=torch.bfloat16,
|
| 160 |
+
device_map="auto",
|
| 161 |
+
max_new_tokens = 512,
|
| 162 |
+
do_sample=True,
|
| 163 |
+
top_k=30,
|
| 164 |
+
num_return_sequences=1,
|
| 165 |
+
eos_token_id=tokenizer.eos_token_id
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
|
| 169 |
+
return tokenizer,model,llm
|
| 170 |
+
|
| 171 |
+
# model = AutoModelForCausalLM.from_pretrained(
|
| 172 |
+
# path,
|
| 173 |
+
# # quantization_config=bnb_config,
|
| 174 |
+
# device_map="auto",
|
| 175 |
+
# trust_remote_code=True,
|
| 176 |
+
# attn_implementation="flash_attention_2",
|
| 177 |
+
# torch_dtype=torch.bfloat16,
|
| 178 |
+
|
| 179 |
+
# )
|
models/.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ignore all files except .gitignore :
|
| 2 |
+
*
|
| 3 |
+
!.gitignore
|
params.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "M1", "embedding_name": "BAAI/bge-base-en-v1.5", "splitter_type_dropdown": "character", "chunk_size_slider": 500, "chunk_overlap_slider": 30, "separator_textbox": "\n", "max_tokens_slider": 1000}
|
requirements.txt
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==4.44
|
| 2 |
+
langchain==0.3.27
|
| 3 |
+
langchain-community==0.3.31
|
| 4 |
+
llama-index
|
| 5 |
+
llama-index-core
|
| 6 |
+
fastapi==0.112.4
|
| 7 |
+
transformers==4.57.0
|
| 8 |
+
pynvml==12.0.0
|
| 9 |
+
datasets==4.0.0
|
| 10 |
+
openpyxl==3.1.5
|
| 11 |
+
trl==0.25.1
|
| 12 |
+
peft==0.17.1
|
| 13 |
+
bitsandbytes==0.48.2
|
| 14 |
+
docx2txt==0.9
|
| 15 |
+
torch
|
| 16 |
+
torchvision
|
| 17 |
+
torchaudio
|
| 18 |
+
jupyter==1.1.1
|
| 19 |
+
langchainhub==0.1.21
|
| 20 |
+
sentence-transformers==5.1.1
|
| 21 |
+
faiss-gpu-cu12==1.13.0
|
| 22 |
+
accelerate==1.10.1
|
| 23 |
+
ninja==1.13.0
|
| 24 |
+
wandb==0.22.2
|
| 25 |
+
docx==0.2.4
|
| 26 |
+
chromadb==1.3.5
|
| 27 |
+
pypdf==6.4.0
|
| 28 |
+
ragas==0.3.9
|
| 29 |
+
# flash-attn --no-build-isolation
|
score_report/2024_03_25_21_10_54model_ans_mistral_finetuned_486_colbert.xlsx
ADDED
|
Binary file (21.9 kB). View file
|
|
|
score_report/2024_03_25_22_09_44model_ans_zepyhr_finetuned_486_colbert.xlsx
ADDED
|
Binary file (18.4 kB). View file
|
|
|
score_report/2024_03_26_22_42_56model_ans_llama_finetuned_486_rag_colbert.xlsx
ADDED
|
Binary file (13.2 kB). View file
|
|
|
test1.txt
ADDED
|
File without changes
|
testing.ipynb
ADDED
|
@@ -0,0 +1,712 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": null,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"def score_report_bar():\n",
|
| 10 |
+
" path=r\"C:\\Users\\Inception\\Desktop\\LLM-based-QA-chatbot-builder\\UI\\score_report\"\n",
|
| 11 |
+
" import os\n",
|
| 12 |
+
" import math\n",
|
| 13 |
+
" dat=[]\n",
|
| 14 |
+
" for x in os.listdir(path):\n",
|
| 15 |
+
" wh=[]\n",
|
| 16 |
+
" flag=0\n",
|
| 17 |
+
" for x2 in x:\n",
|
| 18 |
+
" if x2>='a' and x2<='z':\n",
|
| 19 |
+
" flag=1\n",
|
| 20 |
+
" wh.append(x2)\n",
|
| 21 |
+
" elif flag==1:\n",
|
| 22 |
+
" wh.append(\" \")\n",
|
| 23 |
+
" wh=''.join(wh)\n",
|
| 24 |
+
" wh=wh.replace(\"model ans\",\"\")\n",
|
| 25 |
+
" wh=wh.replace(\"finetuned\",\"\")\n",
|
| 26 |
+
" wh=wh.replace(\" \",\" \")\n",
|
| 27 |
+
" wh=wh.replace(\"xlsx\",\"\")\n",
|
| 28 |
+
" df_temp=pd.read_excel(os.path.join(path,x))\n",
|
| 29 |
+
" rating=sum(df_temp[\"rating\"])/len(df_temp)\n",
|
| 30 |
+
" dat.append({\n",
|
| 31 |
+
" \"Model Name\":wh,\n",
|
| 32 |
+
" \"Average Rating\":rating\n",
|
| 33 |
+
" })\n",
|
| 34 |
+
" temp=pd.DataFrame(dat)\n",
|
| 35 |
+
" return temp"
|
| 36 |
+
]
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"cell_type": "code",
|
| 40 |
+
"execution_count": null,
|
| 41 |
+
"metadata": {},
|
| 42 |
+
"outputs": [],
|
| 43 |
+
"source": [
|
| 44 |
+
"import gradio as gr\n",
|
| 45 |
+
"import pandas as pd\n",
|
| 46 |
+
"\n",
|
| 47 |
+
"def bar_plot_fn():\n",
|
| 48 |
+
" temp=score_report_bar()\n",
|
| 49 |
+
" return gr.BarPlot(\n",
|
| 50 |
+
" temp,\n",
|
| 51 |
+
" x=\"Model Name\",\n",
|
| 52 |
+
" y=\"Average Rating\",\n",
|
| 53 |
+
" x_title=\"Model name\",\n",
|
| 54 |
+
" y_title=\"Average Rating\",\n",
|
| 55 |
+
" title=\"Simple Bar Plot with made up data\",\n",
|
| 56 |
+
" tooltip=[\"Model Name\", \"Average Rating\"],\n",
|
| 57 |
+
" y_lim=[1, 5],\n",
|
| 58 |
+
" width=200,\n",
|
| 59 |
+
" height=1000\n",
|
| 60 |
+
" )\n",
|
| 61 |
+
"with gr.Blocks() as bar_plot:\n",
|
| 62 |
+
" with gr.Row():\n",
|
| 63 |
+
" btn=gr.Button(\"test\")\n",
|
| 64 |
+
" with gr.Row():\n",
|
| 65 |
+
" plot = gr.BarPlot()\n",
|
| 66 |
+
" btn.click(bar_plot_fn, None, outputs=plot)\n",
|
| 67 |
+
"\n",
|
| 68 |
+
"bar_plot.launch()\t"
|
| 69 |
+
]
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"cell_type": "code",
|
| 73 |
+
"execution_count": null,
|
| 74 |
+
"metadata": {},
|
| 75 |
+
"outputs": [],
|
| 76 |
+
"source": [
|
| 77 |
+
"def parse_data(link,num=None): \n",
|
| 78 |
+
" from bs4 import BeautifulSoup\n",
|
| 79 |
+
" import requests\n",
|
| 80 |
+
" import re\n",
|
| 81 |
+
" from docx import Document \n",
|
| 82 |
+
" from langchain_community.document_loaders import WebBaseLoader\n",
|
| 83 |
+
" s=set()\n",
|
| 84 |
+
" import time\n",
|
| 85 |
+
" start_time = time.time()\n",
|
| 86 |
+
" duration = 5\n",
|
| 87 |
+
" def get_links(url):\n",
|
| 88 |
+
" response = requests.get(url)\n",
|
| 89 |
+
" data = response.text\n",
|
| 90 |
+
" soup = BeautifulSoup(data, 'lxml')\n",
|
| 91 |
+
"\n",
|
| 92 |
+
" links = []\n",
|
| 93 |
+
" for link in soup.find_all('a'):\n",
|
| 94 |
+
" link_url = link.get('href')\n",
|
| 95 |
+
" if link_url is not None and link_url.startswith('http'):\n",
|
| 96 |
+
" s.add(link_url)\n",
|
| 97 |
+
" links.append(link_url)\n",
|
| 98 |
+
" \n",
|
| 99 |
+
" return links\n",
|
| 100 |
+
" # def write_to_file(links):\n",
|
| 101 |
+
" # with open('data.txt', 'a') as f:\n",
|
| 102 |
+
" # f.writelines(links)\n",
|
| 103 |
+
" def get_all_links(url):\n",
|
| 104 |
+
" for link in get_links(url):\n",
|
| 105 |
+
" if (time.time() - start_time) >= duration:\n",
|
| 106 |
+
" return\n",
|
| 107 |
+
" get_all_links(link)\n",
|
| 108 |
+
"\n",
|
| 109 |
+
" def data_ret2(link):\n",
|
| 110 |
+
" loader = WebBaseLoader(f\"{link}\")\n",
|
| 111 |
+
" data = loader.load()\n",
|
| 112 |
+
" return data[0].page_content\n",
|
| 113 |
+
" # link = 'https://kuet.ac.bd'\n",
|
| 114 |
+
" s.add(link)\n",
|
| 115 |
+
" get_all_links(link)\n",
|
| 116 |
+
" li=list(s)\n",
|
| 117 |
+
" all_data=[]\n",
|
| 118 |
+
" if num==None:\n",
|
| 119 |
+
" num=len(li)\n",
|
| 120 |
+
" for idx,x in enumerate(li):\n",
|
| 121 |
+
" if idx==num:\n",
|
| 122 |
+
" break\n",
|
| 123 |
+
" try:\n",
|
| 124 |
+
" print(\"Link: \",x)\n",
|
| 125 |
+
" all_data.append(data_ret2(x))\n",
|
| 126 |
+
" except:\n",
|
| 127 |
+
" print(\"pass\")\n",
|
| 128 |
+
" continue\n",
|
| 129 |
+
" all_data2 = re.sub(r'\\n+', '\\n\\n', \"\\n\".join(all_data))\n",
|
| 130 |
+
" document = Document()\n",
|
| 131 |
+
" document.add_paragraph(all_data2)\n",
|
| 132 |
+
" document.save('docx_file.docx')"
|
| 133 |
+
]
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"cell_type": "code",
|
| 137 |
+
"execution_count": 3,
|
| 138 |
+
"metadata": {},
|
| 139 |
+
"outputs": [
|
| 140 |
+
{
|
| 141 |
+
"name": "stdout",
|
| 142 |
+
"output_type": "stream",
|
| 143 |
+
"text": [
|
| 144 |
+
"Link: http://library.kuet.ac.bd/\n",
|
| 145 |
+
"Link: http://kuet.portal.gov.bd/site/page/84728d9d-6059-41c4-940c-0f75eacf7d4c/Quarterly--semiannual-monitoring--evaluation-reports\n",
|
| 146 |
+
"Link: https://kuet.ac.bd/index.php/welcome/shownews/943\n"
|
| 147 |
+
]
|
| 148 |
+
}
|
| 149 |
+
],
|
| 150 |
+
"source": [
|
| 151 |
+
"parse_data(\"https://kuet.ac.bd\")"
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"cell_type": "code",
|
| 156 |
+
"execution_count": null,
|
| 157 |
+
"metadata": {},
|
| 158 |
+
"outputs": [],
|
| 159 |
+
"source": [
|
| 160 |
+
"import os\n",
|
| 161 |
+
"os.getcwd()"
|
| 162 |
+
]
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"cell_type": "code",
|
| 166 |
+
"execution_count": null,
|
| 167 |
+
"metadata": {},
|
| 168 |
+
"outputs": [],
|
| 169 |
+
"source": []
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"cell_type": "code",
|
| 173 |
+
"execution_count": null,
|
| 174 |
+
"metadata": {},
|
| 175 |
+
"outputs": [],
|
| 176 |
+
"source": [
|
| 177 |
+
"df_all=[]\n",
|
| 178 |
+
"for x in os.listdir(\"save_ques_ans\"):\n",
|
| 179 |
+
" path=os.path.join(\"save_ques_ans\",x)\n",
|
| 180 |
+
" df_all.append(pd.read_excel(path))\n",
|
| 181 |
+
"df=pd.concat(df_all,axis=0)"
|
| 182 |
+
]
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"cell_type": "code",
|
| 186 |
+
"execution_count": null,
|
| 187 |
+
"metadata": {},
|
| 188 |
+
"outputs": [],
|
| 189 |
+
"source": []
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"cell_type": "code",
|
| 193 |
+
"execution_count": null,
|
| 194 |
+
"metadata": {},
|
| 195 |
+
"outputs": [],
|
| 196 |
+
"source": [
|
| 197 |
+
"df"
|
| 198 |
+
]
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"cell_type": "code",
|
| 202 |
+
"execution_count": null,
|
| 203 |
+
"metadata": {},
|
| 204 |
+
"outputs": [],
|
| 205 |
+
"source": []
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"cell_type": "code",
|
| 209 |
+
"execution_count": null,
|
| 210 |
+
"metadata": {},
|
| 211 |
+
"outputs": [],
|
| 212 |
+
"source": [
|
| 213 |
+
"\n",
|
| 214 |
+
"doc=[]\n",
|
| 215 |
+
"for x in s:\n",
|
| 216 |
+
" print(x)\n",
|
| 217 |
+
" doc.extend(data_ret2(x))\n",
|
| 218 |
+
" "
|
| 219 |
+
]
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"cell_type": "code",
|
| 223 |
+
"execution_count": null,
|
| 224 |
+
"metadata": {},
|
| 225 |
+
"outputs": [],
|
| 226 |
+
"source": [
|
| 227 |
+
"doc"
|
| 228 |
+
]
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"cell_type": "code",
|
| 232 |
+
"execution_count": null,
|
| 233 |
+
"metadata": {},
|
| 234 |
+
"outputs": [],
|
| 235 |
+
"source": []
|
| 236 |
+
},
|
| 237 |
+
{
|
| 238 |
+
"cell_type": "code",
|
| 239 |
+
"execution_count": null,
|
| 240 |
+
"metadata": {},
|
| 241 |
+
"outputs": [],
|
| 242 |
+
"source": [
|
| 243 |
+
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
| 244 |
+
"if model_name==\"Mistral\":\n",
|
| 245 |
+
" path=\"models/full_KUET_LLM_mistral\"\n",
|
| 246 |
+
"elif model_name==\"Zepyhr\":\n",
|
| 247 |
+
" path=\"models/full_KUET_LLM_zepyhr\"\n",
|
| 248 |
+
"elif model_name==\"Llama2\":\n",
|
| 249 |
+
" path=\"models/full_KUET_LLM_llama\" \n",
|
| 250 |
+
"tokenizer = AutoTokenizer.from_pretrained(path)\n",
|
| 251 |
+
"model = AutoModelForCausalLM.from_pretrained(path,\n",
|
| 252 |
+
" device_map='auto',\n",
|
| 253 |
+
" torch_dtype=torch.float16,\n",
|
| 254 |
+
" use_auth_token=True,\n",
|
| 255 |
+
" load_in_8bit=True,\n",
|
| 256 |
+
" # load_in_4bit=True\n",
|
| 257 |
+
" )\n",
|
| 258 |
+
"model.push_to_hub(repo_id=f\"My_model_{model_name}\",token=hf)\n",
|
| 259 |
+
"tokenizer.push_to_hub(repo_id=f\"My_model_{model_name}\",token=hf)"
|
| 260 |
+
]
|
| 261 |
+
},
|
| 262 |
+
{
|
| 263 |
+
"cell_type": "code",
|
| 264 |
+
"execution_count": 1,
|
| 265 |
+
"metadata": {},
|
| 266 |
+
"outputs": [
|
| 267 |
+
{
|
| 268 |
+
"data": {
|
| 269 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAAIjCAYAAADvBuGTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABvPElEQVR4nO3de5zM9eLH8ffM2It1C8uuW+5ZtwjR6qKydkUuXdxzq5/SSdSeKA7WppLOIZWQDlIS6bCni1bbhi42jlsodFMK65Lsuu6une/vD2Yy9s7sfr8zXs/HYx+n+c53Z97fmTG8z+fz/XxthmEYAgAAAABcFrvZAQAAAADAH1CuAAAAAMALKFcAAAAA4AWUKwAAAADwAsoVAAAAAHgB5QoAAAAAvIByBQAAAABeQLkCAAAAAC+gXAEAAACAF1CuAABFYrPZNGnSJFOee82aNbLZbFqzZo0pzw8AQH4oVwDgg9544w3ZbLY8f77++muzI16WWbNm6Y033jA7hodbb71VzZo1MztGoWRnZ2vBggW69dZbValSJQUFBalOnToaOnSoNm7caHY8APBbpcwOAAC4dE8//bTq1q2bY3uDBg1MSOM9s2bNUmhoqIYMGeKx/ZZbbtHp06cVGBhoTjAfcPr0ad19991KTEzULbfconHjxqlSpUr65Zdf9O6772rhwoXau3evatasaXZUAPA7lCsA8GF33HGH2rRpY3aMEmO32xUcHGx2DEsbPXq0EhMT9eKLL+qxxx7zuC8uLk4vvviiV57H6XQqMzOT9wMALsC0QADwU1lZWapUqZKGDh2a47709HQFBwfriSeekCRlZmZq4sSJat26tSpUqKAyZcro5ptv1urVqwt8niFDhqhOnTo5tk+aNEk2m81j24IFC3T77beratWqCgoKUpMmTTR79myPferUqaNvv/1Wa9eudU9zvPXWWyXlfc7VsmXL1Lp1a5UuXVqhoaG67777tG/fvhw5y5Ytq3379qlnz54qW7asqlSpoieeeELZ2dkFHmdhzZo1S02bNlVQUJCqV6+uRx55RMeOHfPY54cfftA999yj8PBwBQcHq2bNmurbt6/S0tLc+yQlJemmm27SVVddpbJly6pRo0YaN25cvs/9+++/67XXXlOnTp1yFCtJcjgceuKJJ9yjVkV572w2m0aMGKG3337bfXwffPBBoT9jkpSRkaG4uDg1aNBAQUFBqlWrlsaMGaOMjIx8jwsAfAUjVwDgw9LS0nTkyBGPbTabTZUrV1ZAQIDuuusuLV++XK+99prHVLqEhARlZGSob9++ks79Q/jf//63+vXrp2HDhun48eOaN2+eYmJitGHDBrVs2dIreWfPnq2mTZuqe/fuKlWqlD744AP97W9/k9Pp1COPPCJJmjFjhh599FGVLVtW//jHPyRJYWFheT7mG2+8oaFDh+r666/XlClTdPDgQb300kv66quvtGXLFl111VXufbOzsxUTE6N27drpX//6lz799FNNmzZN9evX18MPP3zZxzdp0iTFx8crKipKDz/8sHbv3q3Zs2frf//7n7766isFBAQoMzNTMTExysjI0KOPPqrw8HDt27dPH374oY4dO6YKFSro22+/1Z133qlrr71WTz/9tIKCgvTjjz/qq6++yvf5P/74Y509e1YDBw687GPJzWeffaZ3331XI0aMUGhoqBo2bFjoz5jT6VT37t315Zdf6sEHH1Tjxo21fft2vfjii/r++++VkJBQLJkBoEQZAACfs2DBAkNSrj9BQUHu/VatWmVIMj744AOP3+/SpYtRr1499+2zZ88aGRkZHvv8+eefRlhYmHH//fd7bJdkxMXFuW8PHjzYqF27do6McXFxxsV/zZw6dSrHfjExMR5ZDMMwmjZtanTo0CHHvqtXrzYkGatXrzYMwzAyMzONqlWrGs2aNTNOnz7t3u/DDz80JBkTJ070yCnJePrppz0e87rrrjNat26d47ku1qFDB6Np06Z53n/o0CEjMDDQiI6ONrKzs93bZ86caUgy5s+fbxiGYWzZssWQZCxbtizPx3rxxRcNScbhw4cLzHWhxx9/3JBkbNmypVD7F+W9k2TY7Xbj22+/9dhe2M/YW2+9ZdjtduOLL77w2G/OnDmGJOOrr74qVGYAsDKmBQKAD3v11VeVlJTk8fPxxx+777/99tsVGhqqpUuXurf9+eefSkpKUp8+fdzbHA6He9TB6XTq6NGjOnv2rNq0aaPNmzd7LW/p0qXd/+0adevQoYN+/vlnjylxhbVx40YdOnRIf/vb3zzO/enatasiIiL00Ucf5fid4cOHe9y++eab9fPPPxf5uS/26aefKjMzU4899pjs9r/+eh02bJjKly/vzlKhQgVJ0qpVq3Tq1KlcH8s12vbf//5XTqez0BnS09MlSeXKlbuUQyhQhw4d1KRJE49thf2MLVu2TI0bN1ZERISOHDni/rn99tslqVBTUAHA6pgWCAA+rG3btvkuaFGqVCndc889Wrx4sTIyMhQUFKTly5crKyvL4x++krRw4UJNmzZNu3btUlZWlnt7bqsRXqqvvvpKcXFxSklJyVEs0tLS3MWjsH799VdJUqNGjXLcFxERoS+//NJjW3BwsKpUqeKxrWLFivrzzz+L9LxFyRIYGKh69eq5769bt65iY2M1ffp0vf3227r55pvVvXt33Xfffe7j79Onj/7973/r//7v//TUU0+pY8eOuvvuu3Xvvfd6FLeLlS9fXpJ0/Pjxyz6e3OT2WSjsZ+yHH37Qzp07c7z+LocOHSqWzABQkhi5AgA/17dvXx0/ftw9ovXuu+8qIiJCLVq0cO+zaNEiDRkyRPXr19e8efOUmJiopKQk3X777QWOnFy88IHLxYtE/PTTT+rYsaOOHDmi6dOn66OPPlJSUpIef/xxSSrSCM2lcjgcxf4chTFt2jRt27ZN48aN0+nTpzVy5Eg1bdpUv//+u6RzI3yff/65Pv30Uw0cOFDbtm1Tnz591KlTp3wX34iIiJAkbd++vVA5CvveuVw48nihwnzGnE6nmjdvnmOk1fXzt7/9rVCZAcDKKFcA4OduueUWVatWTUuXLtWRI0f02Wef5Ri1eu+991SvXj0tX75cAwcOVExMjKKionTmzJkCH79ixYo5VsOT/hrJcfnggw+UkZGh999/Xw899JC6dOmiqKioXP/Bntc/+i9Wu3ZtSdLu3btz3Ld79273/SUhryyZmZnas2dPjizNmzfX+PHj9fnnn+uLL77Qvn37NGfOHPf9drtdHTt21PTp0/Xdd9/p2Wef1WeffZbv9Lk77rhDDodDixYtKlTmwr53BSnMZ6x+/fo6evSoOnbsqKioqBw/uY0+AoCvoVwBgJ+z2+2699579cEHH+itt97S2bNnc/zD1zWiYxiGe9v69euVkpJS4OPXr19faWlp2rZtm3vbgQMHtGLFigKfIy0tTQsWLMjxmGXKlMn1H/0Xa9OmjapWrao5c+Z4LOf98ccfa+fOneratWuBj+EtUVFRCgwM1Msvv+xxjPPmzVNaWpo7S3p6us6ePevxu82bN5fdbncfw9GjR3M8vmvFxvyWLa9Vq5aGDRumTz75RK+88kqO+51Op6ZNm+YeISvse1eQwnzGevfurX379un111/P8funT5/WyZMni/ScAGBFnHMFAD7s448/1q5du3Jsb9++verVq+e+3adPH73yyiuKi4tT8+bN1bhxY4/977zzTi1fvlx33XWXunbtqj179mjOnDlq0qSJTpw4kW+Gvn376sknn9Rdd92lkSNH6tSpU5o9e7auueYaj8UwoqOjFRgYqG7duumhhx7SiRMn9Prrr6tq1ao6cOCAx2O2bt1as2fP1jPPPKMGDRqoatWq7oUPLhQQEKCpU6dq6NCh6tChg/r16+deir1OnTruKYfecvjwYT3zzDM5ttetW1cDBgzQ2LFjFR8fr86dO6t79+7avXu3Zs2apeuvv1733XefpHPLmY8YMUK9evXSNddco7Nnz+qtt96Sw+HQPffcI0l6+umn9fnnn6tr166qXbu2Dh06pFmzZqlmzZq66aab8s04bdo0/fTTTxo5cqSWL1+uO++8UxUrVtTevXu1bNky7dq1y708emHfu8Io6DM2cOBAvfvuuxo+fLhWr16tG2+8UdnZ2dq1a5feffddrVq16oq6IDYAP2XyaoUAgEuQ31LskowFCxZ47O90Oo1atWoZkoxnnnkmx+M5nU7jueeeM2rXrm0EBQUZ1113nfHhhx/mulS3LlqK3TAM45NPPjGaNWtmBAYGGo0aNTIWLVqU63Le77//vnHttdcawcHBRp06dYypU6ca8+fPNyQZe/bsce+XmppqdO3a1ShXrpwhyb0s+8VLsbssXbrUuO6664ygoCCjUqVKxoABA4zff//dY5/BgwcbZcqUyXHsueXMTYcOHfJ8vTt27Ojeb+bMmUZERIQREBBghIWFGQ8//LDx559/uu//+eefjfvvv9+oX7++ERwcbFSqVMm47bbbjE8//dS9T3JystGjRw+jevXqRmBgoFG9enWjX79+xvfff19gTsM4t7T+v//9b+Pmm282KlSoYAQEBBi1a9c2hg4dmmOZ9sK+d5KMRx55JM/nLOgzZhjnls6fOnWq0bRpUyMoKMioWLGi0bp1ayM+Pt5IS0sr1LEBgJXZDOOCuQsAAAAAgEvCOVcAAAAA4AWUKwAAAADwAsoVAAAAAHgB5QoAAAAAvIByBQAAAABeQLkCAAAAAC/gIsK5cDqd2r9/v8qVKyebzWZ2HAAAAAAmMQxDx48fV/Xq1WW35z82RbnKxf79+1WrVi2zYwAAAACwiN9++001a9bMdx/KVS7KlSsn6dwLWL58eVOzZGVl6ZNPPlF0dLQCAgJMzWK1PFbKYrU8ZPGNPFbKYrU8ZPGNPFbKYrU8VspitTxk8Y08VspitvT0dNWqVcvdEfJDucqFaypg+fLlLVGuQkJCVL58eUt8sK2Ux0pZrJaHLL6Rx0pZrJaHLL6Rx0pZrJbHSlmslocsvpHHSlmsojCnC7GgBQAAAAB4AeUKAAAAALyAcgUAAAAAXkC5AgAAAAAvoFwBAAAAgBdQrgAAAADACyhXAAAAAOAFlCsAAAAA8ALKFQAAAAB4AeUKAAAAALyAcgUAAAAAXkC5AgAAAAAvoFwBAAAAgBdQriws22lo/Z6j2nTEpvV7jirbaZgdCQAAAEAeSpkdALlL3HFA8R98pwNpZyQ59OYPG1WtQrDiujVR52bVzI4HAAAA4CKMXFlQ4o4DenjR5vPF6i+paWf08KLNStxxwKRkAAAAAPJCubKYbKeh+A++U24TAF3b4j/4jimCAAAAgMVQrixmw56jOUasLmRIOpB2Rhv2HC25UAAAAAAKRLmymEPH8y5Wl7IfAAAAgJJBubKYquWCvbofAAAAgJJBubKYtnUrqVqFYNnyuN8mqVqFYLWtW6kkYwEAAAAoAOXKYhx2m+K6NZGkHAXLdTuuWxM57HnVLwAAAABmoFxZUOdm1TT7vlYKr+A59a9y2UDNvq8V17kCAAAALIhyZVGdm1XTl0/erkX3t1F4aack6YnoRhQrAAAAwKIoVxbmsNvUrm4lRVx17vau1OOm5gEAAACQN8qVD6hR5twFg787kG5yEgAAAAB5oVz5gBoh58rVzgPpMgzD5DQAAAAAckO58gFhpaUAh03Hz5zV73+eNjsOAAAAgFxQrnxAKbtUv0pZSUwNBAAAAKyKcuUjGlcrJ+nc1EAAAAAA1kO58hGNwylXAAAAgJVRrnyEq1wxLRAAAACwJsqVj4g4X65+O3pa6WeyTE4DAAAA4GKUKx9xVUiAqlcIliTtOsDFhAEAAACroVz5ENfo1bv/+00pP/2hbCfXvAIAAACsopTZAVA4q749qA2/HJUkvbf5d723+XdVqxCsuG5N1LlZNZPTAQAAAGDkygd884dNjy75Ricysj22p6ad0cOLNitxxwGTkgEAAABwoVxZXLbT0PJf7MptAqBrW/wH3zFFEAAAADAZ5criNv76p45l2vK835B0IO2MNuw5WnKhAAAAAORAubK4Q8czCrnfmWJOAgAAACA/lCuLq1ouqJD7BRdzEgAAAAD5oVxZXJvaFXVVoKG8JgbaJFWrEKy2dSuVZCwAAAAAF6FcWZzDbtPddZySlKNguW7HdWsihz3v87IAAAAAFD9LlKtXX31VderUUXBwsNq1a6cNGzbku/+yZcsUERGh4OBgNW/eXCtXrvS432az5frzz3/+szgPo9i0qGzolb4tFF7Bc+pfeIVgzb6vFde5AgAAACzA9HK1dOlSxcbGKi4uTps3b1aLFi0UExOjQ4cO5br/unXr1K9fPz3wwAPasmWLevbsqZ49e2rHjh3ufQ4cOODxM3/+fNlsNt1zzz0ldVheF9M0TF8+ebtm9GkhSQqw2/TFmNsoVgAAAIBFmF6upk+frmHDhmno0KFq0qSJ5syZo5CQEM2fPz/X/V966SV17txZo0ePVuPGjTV58mS1atVKM2fOdO8THh7u8fPf//5Xt912m+rVq1dSh1UsHHabul5bXXablOU0dPRkptmRAAAAAJxXyswnz8zM1KZNmzR27Fj3NrvdrqioKKWkpOT6OykpKYqNjfXYFhMTo4SEhFz3P3jwoD766CMtXLgwzxwZGRnKyPhryfP09HRJUlZWlrKysgp7OMXC9fwX5qhWIVj7jp3RnsPHVbG0w/Q8ZrFSFslaeciSNyvlsVIWyVp5yJI3K+WxUhbJWnmslEWyVh6y5M1KeayUxWxFeQ1shmEYxZglX/v371eNGjW0bt06RUZGurePGTNGa9eu1fr163P8TmBgoBYuXKh+/fq5t82aNUvx8fE6ePBgjv1feOEFPf/889q/f7+Cg3NfrnzSpEmKj4/PsX3x4sUKCQm5lEMrVq98a9eP6Xbd1yBb11cx7e0DAAAA/N6pU6fUv39/paWlqXz58vnua+rIVUmYP3++BgwYkGexkqSxY8d6jIalp6erVq1aio6OLvAFLG5ZWVlKSkpSp06dFBAQIEn6IuNb/bh5nypffY263Fbf9DxmsVIWq+Uhi2/ksVIWq+Uhi2/ksVIWq+WxUhar5SGLb+SxUhazuWa1FYap5So0NFQOhyPHiNPBgwcVHh6e6++Eh4cXev8vvvhCu3fv1tKlS/PNERQUpKCgnBfrDQgIsMyH6cIstSuXkSTtS8swLZ9VXxsrsFIesuTNSnmslEWyVh6y5M1KeayURbJWHitlkayVhyx5s1IeK2UxS1GO39QFLQIDA9W6dWslJye7tzmdTiUnJ3tME7xQZGSkx/6SlJSUlOv+8+bNU+vWrdWiRQvvBjfZ1ZXPTVX87egpk5MAAAAAcDF9WmBsbKwGDx6sNm3aqG3btpoxY4ZOnjypoUOHSpIGDRqkGjVqaMqUKZKkUaNGqUOHDpo2bZq6du2qJUuWaOPGjZo7d67H46anp2vZsmWaNm1aiR9TcatZkXIFAAAAWI3p5apPnz46fPiwJk6cqNTUVLVs2VKJiYkKCwuTJO3du1d2+18DbO3bt9fixYs1fvx4jRs3Tg0bNlRCQoKaNWvm8bhLliyRYRgeC1/4i6srnStXB9LPKPOsU4GlTF9RHwAAALjimV6uJGnEiBEaMWJErvetWbMmx7ZevXqpV69e+T7mgw8+qAcffNAb8SwntGygSgc4dDorW/uOnVbd0DJmRwIAAACueAx5+CCbzaZalUpLYmogAAAAYBWUKx9V6/x5V3spVwAAAIAlUK58VK3z51399iflCgAAALACypWPcpcrRq4AAAAAS6Bc+air3eXqtMlJAAAAAEiUK5/lWtCCc64AAAAAa6Bc+SjXghZpp7OUdjrL5DQAAAAAKFc+qkxQKVUuEyiJ864AAAAAK6Bc+TDXoha/s2IgAAAAYDrKlQ9zlSvOuwIAAADMR7nyYTUrBkuSPv/+iFJ++kPZTsPkRAAAAMCVq5TZAXBpEncc0OL1v0mSvvzxiL788YiqVQhWXLcm6tysmsnpAAAAgCsPI1c+KHHHAT28aHOOVQJT087o4UWblbjjgEnJAAAAgCsX5crHZDsNxX/wnXKbAOjaFv/Bd0wRBAAAAEoY5crHbNhzVAfSzuR5vyHpQNoZbdhztORCAQAAAKBc+ZpDx/MuVpeyHwAAAADvoFz5mKrlgr26HwAAAADvoFz5mLZ1K6lahWDZ8rjfJqlahWC1rVupJGMBAAAAVzzKlY9x2G2K69ZEknIULNftuG5N5LDnVb8AAAAAFAfKlQ/q3KyaZt/XSuEVPKf+hVcI1uz7WnGdKwAAAMAEXETYR3VuVk2dmoSr/fPJOpieofjuTXTfDXUYsQIAAABMwsiVD3PYbSoTeK4fN65WgWIFAAAAmIhy5eNs5/uU0+CiwQAAAICZKFc+zjVa5XRSrgAAAAAzUa58nP380FU2I1cAAACAqShXPs41cpXNyBUAAABgKsqVj3NPC2TkCgAAADAV5crH2Wyuc65MDgIAAABc4ShXPs5xfrVAzrkCAAAAzEW58nGsFggAAABYA+XKx7FaIAAAAGANlCsf5ypXDFwBAAAA5qJc+TimBQIAAADWQLnycXaucwUAAABYAuXKx7FaIAAAAGANlCsf5zrnyqBcAQAAAKaiXPm4v6YFmhwEAAAAuMJRrnycg6XYAQAAAEugXPk4VgsEAAAArIFy5ePOD1zJycgVAAAAYCrKlY9zsBQ7AAAAYAmUKx/nOueKkSsAAADAXJQrH8dqgQAAAIA1UK58nJ1zrgAAAABLoFz5OFYLBAAAAKzB9HL16quvqk6dOgoODla7du20YcOGfPdftmyZIiIiFBwcrObNm2vlypU59tm5c6e6d++uChUqqEyZMrr++uu1d+/e4joEU9m5zhUAAABgCaaWq6VLlyo2NlZxcXHavHmzWrRooZiYGB06dCjX/detW6d+/frpgQce0JYtW9SzZ0/17NlTO3bscO/z008/6aabblJERITWrFmjbdu2acKECQoODi6pwypRjFwBAAAA1lDKzCefPn26hg0bpqFDh0qS5syZo48++kjz58/XU089lWP/l156SZ07d9bo0aMlSZMnT1ZSUpJmzpypOXPmSJL+8Y9/qEuXLnrhhRfcv1e/fv18c2RkZCgjI8N9Oz09XZKUlZWlrKysyzvIy+R6/rxyGOdHrLKys0ska0F5SpKVskjWykOWvFkpj5WySNbKQ5a8WSmPlbJI1spjpSyStfKQJW9WymOlLGYrymtgMwxz5pNlZmYqJCRE7733nnr27OnePnjwYB07dkz//e9/c/zO1VdfrdjYWD322GPubXFxcUpISNA333wjp9OpChUqaMyYMfryyy+1ZcsW1a1bV2PHjvV4jotNmjRJ8fHxObYvXrxYISEhl3OYxW75L3atPWBXVHWnutVmyUAAAADAm06dOqX+/fsrLS1N5cuXz3df00aujhw5ouzsbIWFhXlsDwsL065du3L9ndTU1Fz3T01NlSQdOnRIJ06c0PPPP69nnnlGU6dOVWJiou6++26tXr1aHTp0yPVxx44dq9jYWPft9PR01apVS9HR0QW+gMUtKytLSUlJ6tSpkwICAnLcvy1xt9Ye+FV16tVTl5hrTM9TkqyUxWp5yOIbeayUxWp5yOIbeayUxWp5rJTFannI4ht5rJTFbK5ZbYVh6rRAb3M6z43c9OjRQ48//rgkqWXLllq3bp3mzJmTZ7kKCgpSUFBQju0BAQGW+TDllaVUKcf5/7KVaFZfeG3MYqU8ZMmblfJYKYtkrTxkyZuV8lgpi2StPFbKIlkrD1nyZqU8VspilqIcv2kLWoSGhsrhcOjgwYMe2w8ePKjw8PBcfyc8PDzf/UNDQ1WqVCk1adLEY5/GjRv7/WqBrGcBAAAAmMu0chUYGKjWrVsrOTnZvc3pdCo5OVmRkZG5/k5kZKTH/pKUlJTk3j8wMFDXX3+9du/e7bHP999/r9q1a3v5CKzB4S5XtCsAAADATKZOC4yNjdXgwYPVpk0btW3bVjNmzNDJkyfdqwcOGjRINWrU0JQpUyRJo0aNUocOHTRt2jR17dpVS5Ys0caNGzV37lz3Y44ePVp9+vTRLbfcottuu02JiYn64IMPtGbNGjMOsdjZzy/Fns3QFQAAAGAqU8tVnz59dPjwYU2cOFGpqalq2bKlEhMT3YtW7N27V3b7X4Nr7du31+LFizV+/HiNGzdODRs2VEJCgpo1a+be56677tKcOXM0ZcoUjRw5Uo0aNdJ//vMf3XTTTSV+fCXBwUWEAQAAAEswfUGLESNGaMSIEbnel9toU69evdSrV698H/P+++/X/fff7414lnd+4EomragPAAAA4DzTzrmCdzAtEAAAALAGypWPc7jLlclBAAAAgCsc5crHsVogAAAAYA2UKx93vltRrgAAAACTUa58nINzrgAAAABLoFz5OFe5YuQKAAAAMBflysfZbYxcAQAAAFZAufJxf5Urk4MAAAAAVzjKlY9znH8HuYgwAAAAYC7KlY9zj1xRrgAAAABTUa58HKsFAgAAANZAufJxdi4iDAAAAFgC5crH2V1LsbOgBQAAAGAqypWPc3DOFQAAAGAJlCsf51ot0Mk5VwAAAICpKFc+zsbIFQAAAGAJlCsf53AvaGFyEAAAAOAKR7nycQ73gha0KwAAAMBMlCsfZ+c6VwAAAIAlUK583PluxXWuAAAAAJNRrnycg4sIAwAAAJZAufJxTAsEAAAArIFy5ePcC1rQrQAAAABTUa58nOucK0auAAAAAHNRrnycnXOuAAAAAEugXPk4rnMFAAAAWAPlyse5Rq6yGbkCAAAATEW58nEO92qBJgcBAAAArnCUKx/nGrkyGLkCAAAATEW58nGO8+8g0wIBAAAAc1GufJz7nCsWtAAAAABMRbnycawWCAAAAFgD5crH/XWdK5ODAAAAAFc4ypWPs9tZih0AAACwAsqVj3PYmBYIAAAAWAHlysfZWS0QAAAAsATKlY/76zpXXOsKAAAAMBPlyse5pgVKLGoBAAAAmIly5eNcC1pIXOsKAAAAMBPlysc57BeOXFGuAAAAALNQrnzcBd2KcgUAAACYiHLl4+w2pgUCAAAAVkC58nEe0wKdJgYBAAAArnCUKx934WqBXOsKAAAAMI8lytWrr76qOnXqKDg4WO3atdOGDRvy3X/ZsmWKiIhQcHCwmjdvrpUrV3rcP2TIENlsNo+fzp07F+chmOaCbsW0QAAAAMBEpperpUuXKjY2VnFxcdq8ebNatGihmJgYHTp0KNf9161bp379+umBBx7Qli1b1LNnT/Xs2VM7duzw2K9z5846cOCA++edd94picMpcTabzb2oBRcRBgAAAMxjermaPn26hg0bpqFDh6pJkyaaM2eOQkJCNH/+/Fz3f+mll9S5c2eNHj1ajRs31uTJk9WqVSvNnDnTY7+goCCFh4e7fypWrFgSh2MK13lXTAsEAAAAzFPKzCfPzMzUpk2bNHbsWPc2u92uqKgopaSk5Po7KSkpio2N9dgWExOjhIQEj21r1qxR1apVVbFiRd1+++165plnVLly5VwfMyMjQxkZGe7b6enpkqSsrCxlZWVdyqF5jev588txbsVAQxmZWcrKKt63tDB5SoqVskjWykOWvFkpj5WySNbKQ5a8WSmPlbJI1spjpSyStfKQJW9WymOlLGYrymtgM0ycS7Z//37VqFFD69atU2RkpHv7mDFjtHbtWq1fvz7H7wQGBmrhwoXq16+fe9usWbMUHx+vgwcPSpKWLFmikJAQ1a1bVz/99JPGjRunsmXLKiUlRQ6HI8djTpo0SfHx8Tm2L168WCEhId441GI1er1DmU6bJlx3VqHBZqcBAAAA/MepU6fUv39/paWlqXz58vnua+rIVXHp27ev+7+bN2+ua6+9VvXr19eaNWvUsWPHHPuPHTvWYzQsPT1dtWrVUnR0dIEvYHHLyspSUlKSOnXqpICAgFz3Gbc5WZkZ2erQ4VbVrly8ZbAweUqKlbJYLQ9ZfCOPlbJYLQ9ZfCOPlbJYLY+VslgtD1l8I4+VspjNNautMEwtV6GhoXI4HO4RJ5eDBw8qPDw8198JDw8v0v6SVK9ePYWGhurHH3/MtVwFBQUpKCgox/aAgADLfJjyy+Jajt3mcJRYXl95bcxgpTxkyZuV8lgpi2StPGTJm5XyWCmLZK08VsoiWSsPWfJmpTxWymKWohy/qQtaBAYGqnXr1kpOTnZvczqdSk5O9pgmeKHIyEiP/SUpKSkpz/0l6ffff9cff/yhatWqeSe4xbgWtHCyFDsAAABgGtNXC4yNjdXrr7+uhQsXaufOnXr44Yd18uRJDR06VJI0aNAgjwUvRo0apcTERE2bNk27du3SpEmTtHHjRo0YMUKSdOLECY0ePVpff/21fvnlFyUnJ6tHjx5q0KCBYmJiTDnG4ma3sVogAAAAYDbTz7nq06ePDh8+rIkTJyo1NVUtW7ZUYmKiwsLCJEl79+6V3f5XB2zfvr0WL16s8ePHa9y4cWrYsKESEhLUrFkzSZLD4dC2bdu0cOFCHTt2TNWrV1d0dLQmT56c69Q/f2B3j1yZHAQAAAC4gpleriRpxIgR7pGni61ZsybHtl69eqlXr1657l+6dGmtWrXKm/Esz3XOlZORKwAAAMA0pk8LxOVzX0SYc64AAAAA01Cu/MD5gSvOuQIAAABMRLnyA66RKxOvBw0AAABc8ShXfsB1zlU2C1oAAAAApqFc+QE751wBAAAApqNc+YHz3YrVAgEAAAATUa78gJ2l2AEAAADTUa78AEuxAwAAAOajXPkBV7li5AoAAAAwD+XKD9hYLRAAAAAwXZHL1enTp3Xq1Cn37V9//VUzZszQJ5984tVgKDwHC1oAAAAApityuerRo4fefPNNSdKxY8fUrl07TZs2TT169NDs2bO9HhAFc08L5JwrAAAAwDRFLlebN2/WzTffLEl67733FBYWpl9//VVvvvmmXn75Za8HRMFcqwVmM3IFAAAAmKbI5erUqVMqV66cJOmTTz7R3XffLbvdrhtuuEG//vqr1wOiYO5yxcgVAAAAYJoil6sGDRooISFBv/32m1atWqXo6GhJ0qFDh1S+fHmvB0TBXNMCGbgCAAAAzFPkcjVx4kQ98cQTqlOnjtq1a6fIyEhJ50axrrvuOq8HRMHsXOcKAAAAMF2pov7Cvffeq5tuukkHDhxQixYt3Ns7duyou+66y6vhUDiu1QI55woAAAAwT5HLlSSFh4crPDxckpSenq7PPvtMjRo1UkREhFfDoXBc51yxWiAAAABgniJPC+zdu7dmzpwp6dw1r9q0aaPevXvr2muv1X/+8x+vB0TBXNMC6VYAAACAeYpcrj7//HP3UuwrVqyQYRg6duyYXn75ZT3zzDNeD4iCOViKHQAAADBdkctVWlqaKlWqJElKTEzUPffco5CQEHXt2lU//PCD1wOiYFxEGAAAADBfkctVrVq1lJKSopMnTyoxMdG9FPuff/6p4OBgrwdEwWyuBS0oVwAAAIBpirygxWOPPaYBAwaobNmyql27tm699VZJ56YLNm/e3Nv5UAjukSumBQIAAACmKXK5+tvf/qa2bdvqt99+U6dOnWS3nxv8qlevHudcmcR1zhXlCgAAADDPJS3F3qZNG7Vp00aGYcgwDNlsNnXt2tXb2VBIf11E2OQgAAAAwBWsyOdcSdKbb76p5s2bq3Tp0ipdurSuvfZavfXWW97OhkJi5AoAAAAwX5FHrqZPn64JEyZoxIgRuvHGGyVJX375pYYPH64jR47o8ccf93pI5O/8zEwWtAAAAABMVORy9corr2j27NkaNGiQe1v37t3VtGlTTZo0iXJlAjsjVwAAAIDpijwt8MCBA2rfvn2O7e3bt9eBAwe8EgpFw3WuAAAAAPMVuVw1aNBA7777bo7tS5cuVcOGDb0SCkXjGrnKZuQKAAAAME2RpwXGx8erT58++vzzz93nXH311VdKTk7OtXSh+LnLFasFAgAAAKYp8sjVPffco/Xr1ys0NFQJCQlKSEhQaGioNmzYoLvuuqs4MqIAjvPvosHIFQAAAGCaS7rOVevWrbVo0SKPbYcOHdJzzz2ncePGeSUYCu+v61xRrgAAAACzXNJ1rnJz4MABTZgwwVsPhyJwcM4VAAAAYDqvlSuYx70UOyNXAAAAgGkoV37ANS2QbgUAAACYh3LlB5gWCAAAAJiv0AtaxMbG5nv/4cOHLzsMLo1rtUCmBQIAAADmKXS52rJlS4H73HLLLZcVBpfGZmO1QAAAAMBshS5Xq1evLs4cuAwOzrkCAAAATMc5V37Adc6Vk3OuAAAAANNQrvwAFxEGAAAAzEe58gPnuxWrBQIAAAAmolz5Adc5VwblCgAAADAN5coP2FktEAAAADDdJZWrY8eO6ZNPPtGiRYv05ptvevxcildffVV16tRRcHCw2rVrpw0bNuS7/7JlyxQREaHg4GA1b95cK1euzHPf4cOHy2azacaMGZeUzRc43OdcmRwEAAAAuIIVeil2lw8++EADBgzQiRMnVL58efc1lqRz11saNGhQkR5v6dKlio2N1Zw5c9SuXTvNmDFDMTEx2r17t6pWrZpj/3Xr1qlfv36aMmWK7rzzTi1evFg9e/bU5s2b1axZM499V6xYoa+//lrVq1cv6mH6FNc5V6wWCAAAAJinyCNXf//733X//ffrxIkTOnbsmP7880/3z9GjR4scYPr06Ro2bJiGDh2qJk2aaM6cOQoJCdH8+fNz3f+ll15S586dNXr0aDVu3FiTJ09Wq1atNHPmTI/99u3bp0cffVRvv/22AgICipzLl9hZih0AAAAwXZFHrvbt26eRI0cqJCTksp88MzNTmzZt0tixY93b7Ha7oqKilJKSkuvvpKSkKDY21mNbTEyMEhIS3LedTqcGDhyo0aNHq2nTpgXmyMjIUEZGhvt2enq6JCkrK0tZWVlFOSSvcz1/vjmMc/MBz2Y7iz1vofKUECtlkayVhyx5s1IeK2WRrJWHLHmzUh4rZZGslcdKWSRr5SFL3qyUx0pZzFaU18BmFHGJubvvvlt9+/ZV7969ixzsYvv371eNGjW0bt06RUZGurePGTNGa9eu1fr163P8TmBgoBYuXKh+/fq5t82aNUvx8fE6ePCgJGnKlClavXq1Vq1aJZvNpjp16uixxx7TY489lmuOSZMmKT4+Psf2xYsXe6VEFreNh21660eHrqng1CNNOPEKAAAA8JZTp06pf//+SktLU/ny5fPdt8gjV127dtXo0aP13XffqXnz5jmm3HXv3r2oD+lVmzZt0ksvvaTNmzd7nA+Wn7Fjx3qMhqWnp6tWrVqKjo4u8AUsbllZWUpKSlKnTp3ynN7o3HZAb/24XRUrVVaXLtebnqekWCmL1fKQxTfyWCmL1fKQxTfyWCmL1fJYKYvV8pDFN/JYKYvZXLPaCqPI5WrYsGGSpKeffjrHfTabTdnZ2YV+rNDQUDkcDveIk8vBgwcVHh6e6++Eh4fnu/8XX3yhQ4cO6eqrr3bfn52drb///e+aMWOGfvnllxyPGRQUpKCgoBzbAwICLPNhyi9L4PnthmwlltdXXhszWCkPWfJmpTxWyiJZKw9Z8malPFbKIlkrj5WySNbKQ5a8WSmPlbKYpSjHX+QFLZxOZ54/RSlW0rkpfq1bt1ZycrLH4ycnJ3tME7xQZGSkx/6SlJSU5N5/4MCB2rZtm7Zu3er+qV69ukaPHq1Vq1YV8Wh9g+P8u+jkOlcAAACAaYo8cuVtsbGxGjx4sNq0aaO2bdtqxowZOnnypIYOHSpJGjRokGrUqKEpU6ZIkkaNGqUOHTpo2rRp6tq1q5YsWaKNGzdq7ty5kqTKlSurcuXKHs8REBCg8PBwNWrUqGQProS4LyLMaoEAAACAaS6pXK1du1b/+te/tHPnTklSkyZNNHr0aN18881Ffqw+ffro8OHDmjhxolJTU9WyZUslJiYqLCxMkrR3717Z7X8NsLVv316LFy/W+PHjNW7cODVs2FAJCQk5rnF1JXEvxc7IFQAAAGCaIperRYsWaejQobr77rs1cuRISdJXX32ljh076o033lD//v2LHGLEiBEaMWJErvetWbMmx7ZevXqpV69ehX783M6z8icOu+s6VyYHAQAAAK5gRS5Xzz77rF544QU9/vjj7m0jR47U9OnTNXny5EsqV7g89vPlKpt2BQAAAJimyAta/Pzzz+rWrVuO7d27d9eePXu8EgpF43BNC+ScKwAAAMA0RS5XtWrVyrFanyR9+umnqlWrlldCoWjOD1wxcgUAAACYqMjTAv/+979r5MiR2rp1q9q3by/p3DlXb7zxhl566SWvB0TB7HZGrgAAAACzFblcPfzwwwoPD9e0adP07rvvSpIaN26spUuXqkePHl4PiIKxoAUAAABgvktaiv2uu+7SXXfd5e0suETu61zRrgAAAADTFPmcK1gP51wBAAAA5ivUyFWlSpX0/fffKzQ0VBUrVpTt/EhJbo4ePeq1cCgcB+dcAQAAAKYrVLl68cUXVa5cOfd/51euUPLsLMUOAAAAmK5Q5Wrw4MHu/x4yZEhxZcElcrgvImxyEAAAAOAKVuRzrhwOhw4dOpRj+x9//CGHw+GVUCgaRq4AAAAA8xW5XBl5/AM+IyNDgYGBlx0IRec4/y6yoAUAAABgnkIvxf7yyy9Lkmw2m/7973+rbNmy7vuys7P1+eefKyIiwvsJUSBGrgAAAADzFbpcvfjii5LOjVzNmTPHYwpgYGCg6tSpozlz5ng/IQrkXi2QkSsAAADANIUuV3v27JEk3XbbbVq+fLkqVqxYbKFQNO6LCDNyBQAAAJim0OXKZfXq1cWRA5fB7h65MjkIAAAAcAUrcrmSpN9//13vv/++9u7dq8zMTI/7pk+f7pVgKDwH51wBAAAApityuUpOTlb37t1Vr1497dq1S82aNdMvv/wiwzDUqlWr4siIAthdqwVSrgAAAADTFHkp9rFjx+qJJ57Q9u3bFRwcrP/85z/67bff1KFDB/Xq1as4MqIArnOuDCPvpfIBAAAAFK8il6udO3dq0KBBkqRSpUrp9OnTKlu2rJ5++mlNnTrV6wFRMNe0QIlrXQEAAABmKXK5KlOmjPs8q2rVqumnn35y33fkyBHvJUOhuRa0kCS6FQAAAGCOIp9zdcMNN+jLL79U48aN1aVLF/3973/X9u3btXz5ct1www3FkREFcHiUK9oVAAAAYIYil6vp06frxIkTkqT4+HidOHFCS5cuVcOGDVkp0CQXdCumBQIAAAAmKXK5qlevnvu/y5Qpozlz5ng1EIrOfuE5V4xcAQAAAKYo8jlXsJ4LpwUaXEgYAAAAMEWRR67sdrtsF4yUXCw7O/uyAqHoHIxcAQAAAKYrcrlasWKFx+2srCxt2bJFCxcuVHx8vNeCofAuXC2Qc64AAAAAcxS5XPXo0SPHtnvvvVdNmzbV0qVL9cADD3glGIrGbju3DDurBQIAAADm8No5VzfccIOSk5O99XAoItd5V5QrAAAAwBxeKVenT5/Wyy+/rBo1anjj4XAJXCsGMi0QAAAAMEeRpwVWrFjRY0ELwzB0/PhxhYSEaNGiRV4Nh8Jzj1yxWiAAAABgiiKXqxdffNGjXNntdlWpUkXt2rVTxYoVvRoOheceuWJaIAAAAGCKIperIUOGFEMMXC7XgoGccwUAAACYo1Dlatu2bYV+wGuvvfaSw+DS/TUtkHIFAAAAmKFQ5aply5ay2WwyChgVsdlsXETYJK5yxbRAAAAAwByFKld79uwp7hy4TDZWCwQAAABMVahyVbt27eLOgcvkOF+uGLgCAAAAzFHkBS1cvvvuO+3du1eZmZke27t3737ZoVB07mmBjFwBAAAApihyufr555911113afv27R7nYbmnpXHOlSns5y8HzTlXAAAAgDnsRf2FUaNGqW7dujp06JBCQkL07bff6vPPP1ebNm20Zs2aYoiIwnBd54rVAgEAAABzFHnkKiUlRZ999plCQ0Nlt9tlt9t10003acqUKRo5cqS2bNlSHDlRAAcLWgAAAACmKvLIVXZ2tsqVKydJCg0N1f79+yWdW/Ri9+7d3k2HQrO7rnNFtwIAAABMUeSRq2bNmumbb75R3bp11a5dO73wwgsKDAzU3LlzVa9eveLIiEJwjVw5OecKAAAAMEWRy9X48eN18uRJSdLTTz+tO++8UzfffLMqV66spUuXej0gCud8t2JaIAAAAGCSIpermJgY9383aNBAu3bt0tGjR1WxYkX3ioEoee6l2Bm5AgAAAExR5HOuFi1a5B65cqlUqdJlFatXX31VderUUXBwsNq1a6cNGzbku/+yZcsUERGh4OBgNW/eXCtXrvS4f9KkSYqIiFCZMmVUsWJFRUVFaf369Zeczxe4ypVBuQIAAABMUeRy9fjjjyssLEz9+/fXypUrL/u6VkuXLlVsbKzi4uK0efNmtWjRQjExMTp06FCu+69bt079+vXTAw88oC1btqhnz57q2bOnduzY4d7nmmuu0cyZM7V9+3Z9+eWXqlOnjqKjo3X48OHLympldvdqgSYHAQAAAK5QRS5XBw4c0JIlS2Sz2dS7d29Vq1ZNjzzyiNatW3dJAaZPn65hw4Zp6NChatKkiebMmaOQkBDNnz8/1/1feuklde7cWaNHj1bjxo01efJktWrVSjNnznTv079/f0VFRalevXpq2rSppk+frvT0dG3btu2SMvoCO+dcAQAAAKYq8jlXpUqV0p133qk777xTp06d0ooVK7R48WLddtttqlmzpn766adCP1ZmZqY2bdqksWPHurfZ7XZFRUUpJSUl199JSUlRbGysx7aYmBglJCTk+Rxz585VhQoV1KJFi1z3ycjIUEZGhvt2enq6JCkrK0tZWVmFPp7i4Hr+gnK4ylVmMWcubJ6SYKUskrXykCVvVspjpSyStfKQJW9WymOlLJK18lgpi2StPGTJm5XyWCmL2YryGtiMyzxJ58iRI1qyZInmzJmjnTt3Fmma4P79+1WjRg2tW7dOkZGR7u1jxozR2rVrcz1PKjAwUAsXLlS/fv3c22bNmqX4+HgdPHjQve3DDz9U3759derUKVWrVk0JCQm6/vrrc80xadIkxcfH59i+ePFihYSEFPp4zPTKtw79mG7TkIbZui6U0SsAAADAG06dOqX+/fsrLS1N5cuXz3ffIo9cuZ5gxYoVevvtt5WcnKxatWqpX79+eu+99y4pcHG47bbbtHXrVh05ckSvv/66evfurfXr16tq1ao59h07dqzHaFh6erpq1aql6OjoAl/A4paVlaWkpCR16tRJAQEBee635OBG/Zh+VNe2bKku11YzPU9JsFIWq+Uhi2/ksVIWq+Uhi2/ksVIWq+WxUhar5SGLb+SxUhazuWa1FUaRy1Xfvn314YcfKiQkRL1799aECRM8Rp2KIjQ0VA6Hw2PESZIOHjyo8PDwXH8nPDy8UPuXKVNGDRo0UIMGDXTDDTeoYcOGmjdvnscURJegoCAFBQXl2B4QEGCZD1NBWRz2c6fP2ez2EsnsS69NSbNSHrLkzUp5rJRFslYesuTNSnmslEWyVh4rZZGslYcsebNSHitlMUtRjr/IC1o4HA69++67OnDggGbOnHnJxUo6N8WvdevWSk5Odm9zOp1KTk7O83EjIyM99pekpKSkAnM4nU6P86r8jd3OaoEAAACAmYo8cvX22297NUBsbKwGDx6sNm3aqG3btpoxY4ZOnjypoUOHSpIGDRqkGjVqaMqUKZKkUaNGqUOHDpo2bZq6du2qJUuWaOPGjZo7d64k6eTJk3r22WfVvXt3VatWTUeOHNGrr76qffv2qVevXl7NbiWO8wtaOLnOFQAAAGCKQo9cdenSRWlpae7bzz//vI4dO+a+/ccff6hJkyZFDtCnTx/961//0sSJE9WyZUtt3bpViYmJCgsLkyTt3btXBw4ccO/fvn17LV68WHPnzlWLFi303nvvKSEhQc2aNZN0bmRt165duueee3TNNdeoW7du+uOPP/TFF1+oadOmRc7nK1wXEXayFDsAAABgikKPXK1atcpjWt1zzz2n3r1766qrrpIknT17Vrt3776kECNGjNCIESNyvW/NmjU5tvXq1SvPUajg4GAtX778knL4MpvrIsKMXAEAAACmKPTI1cUrtl/mCu7wMoeNkSsAAADATEVe0ALW5J4WSLcCAAAATFHocmWz2dxTzy7cBmv4a7VA2hUAAABghkKfc2UYhoYMGeK+HtSZM2c0fPhwlSlTRpL8eplzX2BntUAAAADAVIUuV4MHD/a4fd999+XYZ9CgQZefCJfEdc4VI1cAAACAOQpdrhYsWFCcOXCZ7JxzBQAAAJiKBS38hHu1QKYFAgAAAKagXPkJ+/l3kmmBAAAAgDkoV37CzjlXAAAAgKkoV37CdZ0rLu4MAAAAmINy5SfcI1eUKwAAAMAUlCs/8de0QJODAAAAAFcoypWfcJx/J1ktEAAAADAH5cpPuK9zxYIWAAAAgCkoV37CwTlXAAAAgKkoV37Cdc4VI1cAAACAOShXfsI1LZCRKwAAAMAclCs/4ZoWyMAVAAAAYA7KlZ9wrxZIuwIAAABMQbnyEzb3da4oVwAAAIAZKFd+wsE5VwAAAICpKFd+wsFqgQAAAICpKFd+wn0RYboVAAAAYArKlZ84362YFggAAACYhHLlJ1znXDEtEAAAADAH5cpP2FktEAAAADAV5cpPODjnCgAAADAV5cpPuFcL5JwrAAAAwBSUKz9hcy1owdAVAAAAYArKlZ/4a1og5QoAAAAwA+XKT1CuAAAAAHNRrvwEqwUCAAAA5qJc+QlXuXI6TQ4CAAAAXKEoV37Ccf6dzGZaIAAAAGAKypWfsLMUOwAAAGAqypWfcC9owTlXAAAAgCkoV37CvaAFI1cAAACAKShXfsJud60WaHIQAAAA4ApFufITjvMjVwYjVwAAAIApKFd+wu5aLZBzrgAAAABTUK78BOdcAQAAAOaiXPkJVgsEAAAAzEW58hN/XefK5CAAAADAFYpy5Scc7tUCaVcAAACAGShXfuJ8t5KTc64AAAAAU1iiXL366quqU6eOgoOD1a5dO23YsCHf/ZctW6aIiAgFBwerefPmWrlypfu+rKwsPfnkk2revLnKlCmj6tWra9CgQdq/f39xH4ap3AtaMHIFAAAAmML0crV06VLFxsYqLi5OmzdvVosWLRQTE6NDhw7luv+6devUr18/PfDAA9qyZYt69uypnj17aseOHZKkU6dOafPmzZowYYI2b96s5cuXa/fu3erevXtJHlaJcy9oQbcCAAAATGF6uZo+fbqGDRumoUOHqkmTJpozZ45CQkI0f/78XPd/6aWX1LlzZ40ePVqNGzfW5MmT1apVK82cOVOSVKFCBSUlJal3795q1KiRbrjhBs2cOVObNm3S3r17S/LQStRf5Yp2BQAAAJihlJlPnpmZqU2bNmns2LHubXa7XVFRUUpJScn1d1JSUhQbG+uxLSYmRgkJCXk+T1pammw2m6666qpc78/IyFBGRob7dnp6uqRzUwyzsrIKeTTFw/X8BeXIPnv23P86ncWaubB5SoKVskjWykOWvFkpj5WySNbKQ5a8WSmPlbJI1spjpSyStfKQJW9WymOlLGYrymtgMwzzhjr279+vGjVqaN26dYqMjHRvHzNmjNauXav169fn+J3AwEAtXLhQ/fr1c2+bNWuW4uPjdfDgwRz7nzlzRjfeeKMiIiL09ttv55pj0qRJio+Pz7F98eLFCgkJuZRDK3GHTkvPbi2lYIehqW2zzY4DAAAA+IVTp06pf//+SktLU/ny5fPd19SRq+KWlZWl3r17yzAMzZ49O8/9xo4d6zEalp6erlq1aik6OrrAF7C4ZWVlKSkpSZ06dVJAQECe+/169JSe3fqlHI5S6tIlxvQ8JcFKWayWhyy+kcdKWayWhyy+kcdKWayWx0pZrJaHLL6Rx0pZzOaa1VYYppar0NBQORyOHCNOBw8eVHh4eK6/Ex4eXqj9XcXq119/1WeffZZvSQoKClJQUFCO7QEBAZb5MBWUJej8fdmGUSKZfem1KWlWykOWvFkpj5WySNbKQ5a8WSmPlbJI1spjpSyStfKQJW9WymOlLGYpyvGbuqBFYGCgWrdureTkZPc2p9Op5ORkj2mCF4qMjPTYX5KSkpI89ncVqx9++EGffvqpKleuXDwHYCF214IWTpODAAAAAFco06cFxsbGavDgwWrTpo3atm2rGTNm6OTJkxo6dKgkadCgQapRo4amTJkiSRo1apQ6dOigadOmqWvXrlqyZIk2btyouXPnSjpXrO69915t3rxZH374obKzs5WamipJqlSpkgIDA8050GLmcF3nitUCAQAAAFOYXq769Omjw4cPa+LEiUpNTVXLli2VmJiosLAwSdLevXtlt/81wNa+fXstXrxY48eP17hx49SwYUMlJCSoWbNmkqR9+/bp/ffflyS1bNnS47lWr16tW2+9tUSOq6S5XiKWYgcAAADMYXq5kqQRI0ZoxIgRud63Zs2aHNt69eqlXr165bp/nTp1ZOICiKZxjVwZhmQYhmznbwMAAAAoGaZfRBjeYb+gTGU7r7xyCQAAAJiNcuUnXAtaSJx3BQAAAJiBcuUnHBeUK1YMBAAAAEoe5cpPOC6YFsiiFgAAAEDJo1z5iQvXr2BaIAAAAFDyKFd+wnNaIOUKAAAAKGmUKz/hYLVAAAAAwFSUKz9x4WqBdCsAAACg5FGu/IirX7GgBQAAAFDyKFd+xHXeFdMCAQAAgJJHufIjdhvlCgAAADAL5cqPuEaumBUIAAAAlDzKlR9xj1zRrgAAAIASR7nyI64FLZgWCAAAAJQ8ypUfcU0LZLVAAAAAoORRrvwI5QoAAAAwD+XKj9hYLRAAAAAwDeXKjzjOlyun0+QgAAAAwBWIcuVH3BcRZlogAAAAUOIoV37Efv7d5JwrAAAAoORRrvyI3T0tkHIFAAAAlDTKlR9xsKAFAAAAYBrKlR+xc84VAAAAYBrKlR9xjVzRrQAAAICSR7nyI+6RK6YFAgAAACWOcuVHzncrpgUCAAAAJqBc+RHXda5YLRAAAAAoeZQrP+Jeip1uBQAAAJQ4ypUfcXDOFQAAAGAaypUfcZ1z5eScKwAAAKDEUa78iJ2LCAMAAACmoVz5EfeCFoxcAQAAACWOcuVHKFcAAACAeShXfsTmnhZochAAAADgCkS58iMO14IWnHMFAAAAlDjKlR9hWiAAAABgHsqVH3GvFki5AgAAAEoc5cqPuMoV0wIBAACAkke58iOuaYFc5woAAAAoeZQrP2J3lSu6FQAAAFDiKFd+xLVaoME5VwAAAECJo1z5EfeCFkwLBAAAAEoc5cqP/DUtkHIFAAAAlDTKlR9xsFogAAAAYBrKlR+xuy8ibHIQAAAA4Apkerl69dVXVadOHQUHB6tdu3basGFDvvsvW7ZMERERCg4OVvPmzbVy5UqP+5cvX67o6GhVrlxZNptNW7duLcb01nK+W3HOFQAAAGACU8vV0qVLFRsbq7i4OG3evFktWrRQTEyMDh06lOv+69atU79+/fTAAw9oy5Yt6tmzp3r27KkdO3a49zl58qRuuukmTZ06taQOwzIc7pEryhUAAABQ0kwtV9OnT9ewYcM0dOhQNWnSRHPmzFFISIjmz5+f6/4vvfSSOnfurNGjR6tx48aaPHmyWrVqpZkzZ7r3GThwoCZOnKioqKiSOgzLYLVAAAAAwDylzHrizMxMbdq0SWPHjnVvs9vtioqKUkpKSq6/k5KSotjYWI9tMTExSkhIuKwsGRkZysjIcN9OT0+XJGVlZSkrK+uyHvtyuZ6/MDlsOleqzmZnF1vuouQpblbKIlkrD1nyZqU8VsoiWSsPWfJmpTxWyiJZK4+VskjWykOWvFkpj5WymK0or4HNMOmKs/v371eNGjW0bt06RUZGurePGTNGa9eu1fr163P8TmBgoBYuXKh+/fq5t82aNUvx8fE6ePCgx76//PKL6tatqy1btqhly5b5Zpk0aZLi4+NzbF+8eLFCQkKKeGTmSfjFrtUH7Lq9mlM96jjNjgMAAAD4vFOnTql///5KS0tT+fLl893XtJErKxk7dqzHiFh6erpq1aql6OjoAl/A4paVlaWkpCR16tRJAQEB+e67Y9X3Wn3gF9WuW1dd7mhkep7iZqUsVstDFt/IY6UsVstDFt/IY6UsVstjpSxWy0MW38hjpSxmc81qKwzTylVoaKgcDkeOEaeDBw8qPDw8198JDw8v0v6FFRQUpKCgoBzbAwICLPNhKkyWgFIOSZIhW7Hn9rXXpiRZKQ9Z8malPFbKIlkrD1nyZqU8VsoiWSuPlbJI1spDlrxZKY+VspilKMdv2oIWgYGBat26tZKTk93bnE6nkpOTPaYJXigyMtJjf0lKSkrKc/8rjWu1QJNmegIAAABXNFOnBcbGxmrw4MFq06aN2rZtqxkzZujkyZMaOnSoJGnQoEGqUaOGpkyZIkkaNWqUOnTooGnTpqlr165asmSJNm7cqLlz57of8+jRo9q7d6/2798vSdq9e7ekc6NelzvCZXU212qBlCsAAACgxJlarvr06aPDhw9r4sSJSk1NVcuWLZWYmKiwsDBJ0t69e2W3/zW41r59ey1evFjjx4/XuHHj1LBhQyUkJKhZs2bufd5//313OZOkvn37SpLi4uI0adKkkjkwkzjcS7GbHAQAAAC4Apm+oMWIESM0YsSIXO9bs2ZNjm29evVSr1698ny8IUOGaMiQIV5K51sc53uok+tcAQAAACXO1IsIw7vs58+5cjItEAAAAChxlCs/YuecKwAAAMA0lCs/4jrnimmBAAAAQMmjXPkR17TAbLoVAAAAUOIoV37Eca5bcc4VAAAAYALKlR9xL2jBtEAAAACgxFGu/Ih7QQvKFQAAAFDiKFd+xMFS7AAAAIBpKFd+xL1aIN0KAAAAKHGUKz9yvlsxLRAAAAAwAeXKjzAtEAAAADAP5cqPuMoVI1cAAABAyaNc+RG7jZErAAAAwCyUKz/iLldOk4MAAAAAVyDKlR9xnH83sxm5AgAAAEoc5cqPcBFhAAAAwDyUKz/iWtDCYOQKAAAAKHGUKz/iHrmiXAEAAAAljnLlR+zupdhNDgIAAABcgShXfsThXi2QkSsAAACgpFGu/Iid1QIBAAAA01Cu/AgXEQYAAADMQ7nyI67VApkWCAAAAJQ8ypUfYbVAAAAAwDyUKz/y18iVyUEAAACAKxDlyo84OOcKAAAAMA3lyo+c71bK5pwrAAAAoMRRrvyIe1ogI1cAAABAiaNc+RFXuWLkCgAAACh5lCs/8td1rkwOAgAAAFyBKFd+5PzAFde5AgAAAExAufIj7mmBnHMFAAAAlDjKlR9xX0SYkSsAAACgxFGu/Ihr5IqBKwAAAKDkUa78iHvkinYFAAAAlDjKlR+xn383mRYIAAAAlDzKlR9xnB+5klgxEAAAAChplCs/4jrnSpKcTA0EAAAAShTlyo/YLhi54rwrAAAAoGRRrvyIx8iV08QgAAAAwBWIcuVHHIxcAQAAAKahXPkRQ38VqvU//8GqgQAAAEAJKmV2AHhH4o4DmvT+d+7bDyzcqPDyQerX9mrVCS2jquWC1bZuJY+pg9lOQxv2HNWh42dyvR8AAABA4VGu/EDijgN6eNFmXTxOlZqeoRc//cF9+8Ky9cuRU3pnw16lpp/J9f6q5YLVunZFbfr1T3f5al27ojbsOapNR2yqvOeo2tarkuP+C29T1q4MF5b00DJBkk06ciIj189MZIOqfCZQoGynofV81xQL/k81AChelCsfl+00FP/BdzmKVW4uLlsF3W+3SRfOLPzrtkNv/rAxn/vPqVQmQHe1rKGoJuG5FrXLvV2UolcSt115Kv70hxylSnkUDLOyFPdrk1tJv9DFn5nCFHhfeW1yK5JWfZ+8nccbx164z1ThvmuK83MVWiZIZ7PP+uT7VJg/r/m9doV5n331M8yf7ypePVZffW386c93cXzGrfI+VS3nW/9HkM0wzF/54NVXX9U///lPpaamqkWLFnrllVfUtm3bPPdftmyZJkyYoF9++UUNGzbU1KlT1aVLF/f9hmEoLi5Or7/+uo4dO6Ybb7xRs2fPVsOGDQuVJz09XRUqVFBaWprKly9/2cd3ObKysrRy5Up16dJFAQEBOe5P+ekP9Xv9axOSFU1B/zjyt9sXMjtLSR5rUZmdnff50m9fyEqfqeLOY/brXtyvrT8f+5X8vnOsed++kNlZOPa8b1erEKy4bk3UuVm13A+gmBWlG5i+oMXSpUsVGxuruLg4bd68WS1atFBMTIwOHTqU6/7r1q1Tv3799MADD2jLli3q2bOnevbsqR07drj3eeGFF/Tyyy9rzpw5Wr9+vcqUKaOYmBidOZP7/7vuyw4d941juvgPs7/ftlKWkjzWojI7O+/zpd8uqce+FL50rFb78+rPx34lv+8ca963rZSFY8/7dmraGT28aLMSdxyQ1Zk+ctWuXTtdf/31mjlzpiTJ6XSqVq1aevTRR/XUU0/l2L9Pnz46efKkPvzwQ/e2G264QS1bttScOXNkGIaqV6+uv//973riiSckSWlpaQoLC9Mbb7yhvn37FpiJkSsAAADAOmySwisE68snby/xKYJF6QamnnOVmZmpTZs2aezYse5tdrtdUVFRSklJyfV3UlJSFBsb67EtJiZGCQkJkqQ9e/YoNTVVUVFR7vsrVKigdu3aKSUlJddylZGRoYyMDPft9PR0SeeKTVZW1iUfnze4nj+vHNfVLKfw8kE6mJ5RqPOuAAAAAF9jSDqQdkYpPx5Su7qVSvS5i9IHTC1XR44cUXZ2tsLCwjy2h4WFadeuXbn+Tmpqaq77p6amuu93bctrn4tNmTJF8fHxObZ/8sknCgkJKdzBFLOkpKQ87+sSbtP8dNcMz5Jt8gAAAEBJ+eSL9fpjZ8kOKZw6darQ+7JaoKSxY8d6jIalp6erVq1aio6OtsS0wKSkJHXq1CnXaYGS1EVSq28P6pmVu5SanpHrPgAAAICvi765XYmPXLlmtRWGqeUqNDRUDodDBw8e9Nh+8OBBhYeH5/o74eHh+e7v+t+DBw+qWrVqHvu0bNky18cMCgpSUFBQju0BAQF5FpqSVlCWO1vW1B3X1nBfv6SgJbIBAAAAX+E658qMa2YWpQ+YulpgYGCgWrdureTkZPc2p9Op5ORkRUZG5vo7kZGRHvtL56bMufavW7euwsPDPfZJT0/X+vXr83xMf+Gw2xRZv7J6tKyhUVEN9dVTt+udYTfopb4t9XjUNQovH+yxf3j5ID0e1TDP+y/+3Bb1NsBnAt7Adw0AXNlcX/tx3ZpY/npXpk8LjI2N1eDBg9WmTRu1bdtWM2bM0MmTJzV06FBJ0qBBg1SjRg1NmTJFkjRq1Ch16NBB06ZNU9euXbVkyRJt3LhRc+fOlSTZbDY99thjeuaZZ9SwYUPVrVtXEyZMUPXq1dWzZ0+zDtMUrrLlMuL2Bu6RrdwuyHbx/bleUO7nw/rki/WKvrldnheUS/ouVQlb9+voyUz3Y5t9fQSuHVF8ty+8CGluFyV0fWYq1rpGSzfu8xhNNTs77/Ol375QcXymereuqWO/f5/vd01eo/S+dKxm/nktzGvnT8d+Jb/vHGvety9kdhaOPe/b4SZf56ooTC9Xffr00eHDhzVx4kSlpqaqZcuWSkxMdC9IsXfvXtntfw2wtW/fXosXL9b48eM1btw4NWzYUAkJCWrWrJl7nzFjxujkyZN68MEHdezYMd10001KTExUcHBwjue/klxctgpz/8W329WtpD92GmpXt5ICStlz3T+yfmX9o2uTAovaZV/FvBBFr0Svqn4+T9SNbeUoVapEr3Zv5mtT0FXTXZ+ZLrfV18ioRsX+uSip16YwV7e30vvkzTzeOPaCPlPO7LNauXJ3vt81LoX5P4Yu530+m31WyV9t8Ln3qTB/XvN77QrzPvvqZ5g/31W8eqy++tr4259vb3/GrfI+FebfGpZiIIe0tDRDkpGWlmZ2FCMzM9NISEgwMjMzzY5iGIa18lgpi2FYKw9Z8malPFbKYhjWykOWvFkpj5WyGIa18lgpi2FYKw9Z8malPFbKYraidANTz7kCAAAAAH9BuQIAAAAAL6BcAQAAAIAXUK4AAAAAwAsoVwAAAADgBZQrAAAAAPACyhUAAAAAeAHlCgAAAAC8gHIFAAAAAF5AuQIAAAAAL6BcAQAAAIAXUK4AAAAAwAsoVwAAAADgBaXMDmBFhmFIktLT001OImVlZenUqVNKT09XQECA2XEslcdKWayWhyy+kcdKWayWhyy+kcdKWayWx0pZrJaHLL6Rx0pZzObqBK6OkB/KVS6OHz8uSapVq5bJSQAAAABYwfHjx1WhQoV897EZhalgVxin06n9+/erXLlystlspmZJT09XrVq19Ntvv6l8+fKmZrFaHitlsVoesvhGHitlsVoesvhGHitlsVoeK2WxWh6y+EYeK2Uxm2EYOn78uKpXry67Pf+zqhi5yoXdblfNmjXNjuGhfPnylvpgWymPlbJI1spDlrxZKY+VskjWykOWvFkpj5WySNbKY6UskrXykCVvVspjpSxmKmjEyoUFLQAAAADACyhXAAAAAOAFlCuLCwoKUlxcnIKCgsyOIslaeayURbJWHrLkzUp5rJRFslYesuTNSnmslEWyVh4rZZGslYcsebNSHitl8SUsaAEAAAAAXsDIFQAAAAB4AeUKAAAAALyAcgUAAAAAXkC5AgAAAAAvoFxZ1Oeff65u3bqpevXqstlsSkhIsMxzG4ahiRMnqlq1aipdurSioqL0ww8/mJbn4MGDGjJkiKpXr66QkBB17ty52PJMmTJF119/vcqVK6eqVauqZ8+e2r17t8c+qampGjhwoMLDw1WmTBm1atVK//nPf7yeZfbs2br22mvdF/eLjIzUxx9/LEn65ZdfZLPZcv1ZtmyZ17Nc7Pnnn5fNZtNjjz3msT0lJUW33367ypQpo/Lly+uWW27R6dOnvf78kyZNynHcERER7vtvvfXWHPcPHz7c6zlc9u3bp/vuu0+VK1dW6dKl1bx5c23cuDHXfYcPHy6bzaYZM2YUS5Y6derk+rl45JFHJEkPPfSQ6tevr9KlS6tKlSrq0aOHdu3aVSxZsrOzNWHCBNWtW1elS5dW/fr1NXnyZLnWWcrKytKTTz6p5s2bq0yZMqpevboGDRqk/fv3F0ue48eP67HHHlPt2rVVunRptW/fXv/73//c9y9fvlzR0dGqXLmybDabtm7d6rXn9sb37tGjRzVgwACVL19eV111lR544AGdOHHCtDybN29Wp06ddNVVV6ly5cp68MEHLylPQVkK875483s5vzyF+cyuWbMmz+/nCz9vl5tFOvddGBERoTJlyqhixYqKiorS+vXrczzORx99pHbt2ql06dKqWLGievbsWaQchclyoby+53L7fnr++eeLnKUweYYMGZLjuTp37pzrY2VkZKhly5aX/Oe+MK/Nzp071b17d1WoUEFlypTR9ddfr71797rv9+Z3c0F58vp8/vOf/5R07t8YDzzwgMd3d1xcnDIzMy8pj7+hXFnUyZMn1aJFC7366quWe+4XXnhBL7/8subMmaP169erTJkyiomJ0ZkzZ0o8j2EY6tmzp37++Wf997//1ZYtW1S7dm1FRUXp5MmTXs+ydu1aPfLII/r666+VlJSkrKwsRUdHezzXoEGDtHv3br3//vvavn277r77bvXu3VtbtmzxapaaNWvq+eef16ZNm7Rx40bdfvvt6tGjh7799lvVqlVLBw4c8PiJj49X2bJldccdd3g1x8X+97//6bXXXtO1117rsT0lJUWdO3dWdHS0NmzYoP/9738aMWKE7Pbi+Rpq2rSpx/F/+eWXHvcPGzbM4/4XXnihWHL8+eefuvHGGxUQEKCPP/5Y3333naZNm6aKFSvm2HfFihX6+uuvVb169WLJIp17fy487qSkJElSr169JEmtW7fWggULtHPnTq1atUqGYSg6OlrZ2dlezzJ16lTNnj1bM2fO1M6dOzV16lS98MILeuWVVyRJp06d0ubNmzVhwgRt3rxZy5cv1+7du9W9e3evZ5Gk//u//1NSUpLeeustbd++XdHR0YqKitK+ffsknfsuuummmzR16lSvP7c3vncHDBigb7/9VklJSfrwww/1+eef68EHHzQlz/79+xUVFaUGDRpo/fr1SkxM1LfffqshQ4Z4PUth3hdvfi/nl6cwn9n27dvn+H7+v//7P9WtW1dt2rTxWhZJuuaaazRz5kxt375dX375perUqaPo6GgdPnzYvc9//vMfDRw4UEOHDtU333yjr776Sv379y9SjsJkcSnoe+7pp5/2eG0effTRImcpbJ7OnTt7PNc777yT635jxoy5rO/lgrL89NNPuummmxQREaE1a9Zo27ZtmjBhgoKDg937ePO7uaA8F38+58+fL5vNpnvuuUeStGvXLjmdTr322mv69ttv9eKLL2rOnDkaN25ckbP4JQOWJ8lYsWKFJZ7b6XQa4eHhxj//+U/3tmPHjhlBQUHGO++8U+J5du/ebUgyduzY4d6WnZ1tVKlSxXj99deLPc+hQ4cMScbatWvd28qUKWO8+eabHvtVqlSpRPJUrFjR+Pe//53rfS1btjTuv//+Yn3+48ePGw0bNjSSkpKMDh06GKNGjXLf165dO2P8+PHF+vwucXFxRosWLfK8/+JsxenJJ580brrppgL3+/33340aNWoYO3bsMGrXrm28+OKLxR/OMIxRo0YZ9evXN5xOZ673f/PNN4Yk48cff/T6c3ft2jXHZ/Luu+82BgwYkOfvbNiwwZBk/Prrr17NcurUKcPhcBgffvihx/ZWrVoZ//jHPzy27dmzx5BkbNmyxasZXC7le/e7774zJBn/+9//3Pt8/PHHhs1mM/bt21fieV577TWjatWqRnZ2tnufbdu2GZKMH374wWtZLpTf+1Jc38uF+fu5oM9sZmamUaVKFePpp58u9ixpaWmGJOPTTz81DMMwsrKyjBo1auT594a3sxT0PVdc33255Rk8eLDRo0ePAn935cqVRkREhPHtt9965c99bln69Olj3HfffUV6HG99Nxfmc9OjRw/j9ttvz3efF154wahbt+5lZfEXjFyhSPbs2aPU1FRFRUW5t1WoUEHt2rVTSkpKiefJyMiQJI//d8dutysoKCjHSEVxSEtLkyRVqlTJva19+/ZaunSpjh49KqfTqSVLlujMmTO69dZbiy1Hdna2lixZopMnTyoyMjLH/Zs2bdLWrVv1wAMPFFsGSXrkkUfUtWtXj8+HJB06dEjr169X1apV1b59e4WFhalDhw7F+h798MMPql69uurVq6cBAwZ4TK+QpLfffluhoaFq1qyZxo4dq1OnThVLjvfff19t2rRRr169VLVqVV133XV6/fXXPfZxOp0aOHCgRo8eraZNmxZLjtxkZmZq0aJFuv/++2Wz2XLcf/LkSS1YsEB169ZVrVq1vP787du3V3Jysr7//ntJ0jfffKMvv/wy39HVtLQ02Ww2XXXVVV7NcvbsWWVnZ3t8l0hS6dKlS+S7JD+F+d5NSUnRVVdd5THyERUVJbvdnus0sOLOk5GRocDAQI+R6dKlS0uSKa+nGd/LLgV9Zt9//3398ccfGjp0aLHmyMzM1Ny5c1WhQgW1aNFC0rmpm/v27ZPdbtd1112natWq6Y477tCOHTu8/vyF/Z57/vnnVblyZV133XX65z//qbNnz3o9i8uaNWtUtWpVNWrUSA8//LD++OMPj/sPHjyoYcOG6a233lJISEixZHA6nfroo490zTXXKCYmRlWrVlW7du3ynVZZ3N/NFzp48KA++uijAv/9kJaW5vFvoSsZ5QpFkpqaKkkKCwvz2B4WFua+ryRFRETo6quv1tixY/Xnn38qMzNTU6dO1e+//64DBw4U63M7nU499thjuvHGG9WsWTP39nfffVdZWVmqXLmygoKC9NBDD2nFihVq0KCB1zNs375dZcuWVVBQkIYPH64VK1aoSZMmOfabN2+eGjdurPbt23s9g8uSJUu0efNmTZkyJcd9P//8s6Rz8/+HDRumxMREtWrVSh07diyW8+PatWunN954Q4mJiZo9e7b27Nmjm2++WcePH5ck9e/fX4sWLdLq1as1duxYvfXWW7rvvvu8nkM6d+yzZ89Ww4YNtWrVKj388MMaOXKkFi5c6N5n6tSpKlWqlEaOHFksGfKSkJCgY8eO5ZiqNWvWLJUtW1Zly5bVxx9/rKSkJAUGBnr9+Z966in17dtXERERCggI0HXXXafHHntMAwYMyHX/M2fO6Mknn1S/fv1Uvnx5r2YpV66cIiMjNXnyZO3fv1/Z2dlatGiRUlJSiv27pCCF+d5NTU1V1apVPe4vVaqUKlWq5PXv5sLkuf3225Wamqp//vOfyszM1J9//qmnnnpKkkx5PUvye/lChfnMzps3TzExMapZs2axZPjwww9VtmxZBQcH68UXX1RSUpJCQ0MleX43jx8/Xh9++KEqVqyoW2+9VUePHvVqjsJ8z40cOVJLlizR6tWr9dBDD+m5557TmDFjvJrDpXPnznrzzTeVnJysqVOnau3atbrjjjvc0+wMw9CQIUM0fPjwIk/XLIpDhw7pxIkTev7559W5c2d98sknuuuuu3T33Xdr7dq1HvuW1HfzhRYuXKhy5crp7rvvznOfH3/8Ua+88ooeeuihYs3iM8weOkPBZKFpgV999ZUhydi/f7/Hfr169TJ69+5d4nkMwzA2btxotGjRwpBkOBwOIyYmxrjjjjuMzp07F2uW4cOHG7Vr1zZ+++03j+0jRoww2rZta3z66afG1q1bjUmTJhkVKlQwtm3b5vUMGRkZxg8//GBs3LjReOqpp4zQ0FDj22+/9djn1KlTRoUKFYx//etfXn9+l7179xpVq1Y1vvnmG/e2C6feuT43Y8eO9fi95s2bG0899VSx5XL5888/jfLly+c59SU5ObnYpr4FBAQYkZGRHtseffRR44YbbjAM49znNywszGPqVklNC4yOjjbuvPPOHNuPHTtmfP/998batWuNbt26Ga1atTJOnz7t9ed/5513jJo1axrvvPOOsW3bNuPNN980KlWqZLzxxhs59s3MzDS6detmXHfddUZaWprXsxiGYfz444/GLbfc4v4uuf76640BAwYYERERHvuV9LTAwnzvPvvss8Y111yT47GqVKlizJo1q8TzGIZhvP3220ZYWJjhcDiMwMBA44knnjDCwsKM559/3mtZLpTf+1Jc38v55SnMZ/a3334z7Ha78d57711WjvyynDhxwvjhhx+MlJQU4/777zfq1KljHDx40DCMc++RJOO1115z73/mzBkjNDTUmDNnjteyXOr33Lx584xSpUoZZ86cueQsueXJzU8//eQxZfKll14ybrzxRuPs2bOGYXjvz/3FWfbt22dIMvr16+exX7du3Yy+fft6bCuO7+aCXptGjRoZI0aMyPP+33//3ahfv77xwAMPXFYOf0K58gFWKleuL5+Lv1xuueUWY+TIkSWe50LHjh0zDh06ZBiGYbRt29b429/+Vmw5HnnkEaNmzZrGzz//7LH9xx9/zHEOmGEYRseOHY2HHnqo2PJc+DwPPvigx7Y333zTCAgIcL82xWHFihXuf5C6fiQZNpvNcDgc7tflrbfe8vi93r17G/379y+2XBdq06ZNnkXuxIkThiQjMTHR68979dVX5/hLZ9asWUb16tUNwzCMF1980f06Xfja2e12o3bt2l7P4/LLL78YdrvdSEhIyHe/jIwMIyQkxFi8eLHXM9SsWdOYOXOmx7bJkycbjRo18tiWmZlp9OzZ07j22muNI0eOeD3HxU6cOOEuDr179za6dOnicX9Jl6vCfO/OmzfPuOqqqzzuz8rKMhwOh7F8+fISz3Oh1NRU4/jx48aJEycMu91uvPvuu17LcqG83pfi/F7OK09hP7NPP/20UaVKFSMzM/OycuSX5WINGjQwnnvuOcMwDOOzzz4zJBlffPGFxz5t27Y1xo0b57Usl/o9t2PHDkOSsWvXrkvOkluevFxYKnv06GHY7fYcmR0OhzFo0CCvZcnIyDBKlSplTJ482WO/MWPGGO3bt8/zcbz13Zzfa/P5558bkoytW7fmev++ffuMhg0bGgMHDvQ4v/JKx7RAFEndunUVHh6u5ORk97b09HStX78+13N9SlKFChVUpUoV/fDDD9q4caN69Ojh9ecwDEMjRozQihUr9Nlnn6lu3boe97vO27l4BTyHwyGn0+n1PBdzOp3u89Bc5s2bp+7du6tKlSrF9rwdO3bU9u3btXXrVvdPmzZtNGDAAG3dulX16tVT9erVcyxb//3336t27drFlsvlxIkT+umnn1StWrVc73ctrZvX/ZfjxhtvzPe4Bw4cqG3btnm8dtWrV9fo0aO1atUqr+dxWbBggapWraquXbvmu59x7v+Ey/G58oZTp04V+GclKytLvXv31g8//KBPP/1UlStX9nqOi5UpU0bVqlXTn3/+qVWrVhXLd0lRFOZ7NzIyUseOHdOmTZvc+3z22WdyOp1q165diee5UFhYmMqWLaulS5cqODhYnTp18mqegpT093JhP7OGYWjBggUaNGiQAgICvJ4jLxf+PdG6dWsFBQV5fEdlZWXpl19+8ep386V+z23dulV2uz3HlNfi8Pvvv+uPP/5w/z3w8ssv65tvvnHnXblypSRp6dKlevbZZ732vIGBgbr++uuL/PdjcX43u8ybN0+tW7d2n6N3oX379unWW291r2JYXCv/+qJSZgdA7k6cOKEff/zRfXvPnj3aunWrKlWqpKuvvtrU537sscf0zDPPqGHDhqpbt64mTJig6tWrX9J1MbyRZ9myZapSpYquvvpqbd++XaNGjVLPnj0VHR3t9SyPPPKIFi9erP/+978qV66c+/yCChUqqHTp0oqIiFCDBg300EMP6V//+pcqV66shIQE99LI3jR27Fjdcccduvrqq3X8+HEtXrxYa9as8fiL6scff9Tnn3/u/kuhuJQrV87jvDPp3D9QK1eu7N4+evRoxcXFqUWLFmrZsqUWLlyoXbt26b333vN6nieeeELdunVT7dq1tX//fsXFxcnhcKhfv3766aeftHjxYnXp0kWVK1fWtm3b9Pjjj+uWW27JsXy8Nzz++ONq3769nnvuOfXu3VsbNmzQ3LlzNXfuXElS5cqVc/zjKyAgQOHh4WrUqJHX80jn/nG1YMECDR48WKVK/fXXwM8//6ylS5cqOjpaVapU0e+//67nn39epUuXVpcuXbyeo1u3bnr22Wd19dVXq2nTptqyZYumT5+u+++/X9K5f+Tde++92rx5sz788ENlZ2e7/8xVqlTJ6+cauJY3btSokX788UeNHj1aERER7oUGjh49qr1797qvWeT6x1B4eLjCw8Mv67kv93u3cePG6ty5s4YNG6Y5c+YoKytLI0aMUN++fS9pCWlv/D0wc+ZMtW/fXmXLllVSUpJGjx6t559/vsiLkRSUpaD3xdvfy/nlqVatWqE/s5999pn27Nmj//u//ytyhsJkqVy5sp599ll1795d1apV05EjR/Tqq69q37597ksvlC9fXsOHD1dcXJxq1aql2rVru69l5NrHG1muvvrqAr/nUlJStH79et12220qV66cUlJS9Pjjj+u+++7L9dIVl5OnUqVKio+P1z333KPw8HD99NNPGjNmjBo0aKCYmBhJyvFvrbJly0qS6tevX+Tz4wp6bUaPHq0+ffrolltu0W233abExER98MEHWrNmjSTvfzcX5t+Y6enpWrZsmaZNm5bj913Fqnbt2vrXv/7lsbT/5X4X+gUTR82Qj9WrVxuScvwMHjzY9Od2Op3GhAkTjLCwMCMoKMjo2LGjsXv3btPyvPTSS0bNmjWNgIAA4+qrrzbGjx9vZGRkFEuW3HJIMhYsWODe5/vvvzfuvvtuo2rVqkZISIhx7bXX5lgC2Bvuv/9+o3bt2kZgYKBRpUoVo2PHjsYnn3zisc/YsWONWrVqmTJcn9ty51OmTDFq1qxphISEGJGRkTmmonhLnz59jGrVqhmBgYFGjRo1jD59+rjPp9q7d69xyy23GJUqVTKCgoKMBg0aGKNHjy6283gMwzA++OADo1mzZkZQUJARERFhzJ07N9/9i/ucq1WrVhmScvy53bdvn3HHHXcYVatWNQICAoyaNWsa/fv3v+wpOXlJT083Ro0aZVx99dVGcHCwUa9ePeMf//iH+8+va5pXbj+rV6/2ep6lS5ca9erVMwIDA43w8HDjkUceMY4dO+a+f8GCBblmiYuLu+zn9sb37h9//GH069fPKFu2rFG+fHlj6NChxvHjx03LM3DgQKNSpUpGYGDgZX0PFpSlMO+LN7+X88tTlM9sv3798p32dblZTp8+bdx1111G9erVjcDAQKNatWpG9+7djQ0bNng8RmZmpvH3v//dqFq1qlGuXDkjKioqxxTKy82Sm4u/5zZt2mS0a9fOqFChghEcHGw0btzYeO655y75fKv88pw6dcqIjo42qlSpYgQEBBi1a9c2hg0bZqSmpub5eJczHbgwr828efOMBg0aGMHBwUaLFi08pmx7+7u5MHlee+01o3Tp0h7fgS55/ZmjVpxjMwzDKLiCAQAAAADywwRJAAAAAPACyhUAAAAAeAHlCgAAAAC8gHIFAAAAAF5AuQIAAAAAL6BcAQAAAIAXUK4AAAAAwAsoVwAAAADgBZQrAAC8zGazKSEhwewYAIASRrkCAPiVIUOGyGaz5fjp3Lmz2dEAAH6ulNkBAADwts6dO2vBggUe24KCgkxKAwC4UjByBQDwO0FBQQoPD/f4qVixoqRzU/Zmz56tO+64Q6VLl1a9evX03nvvefz+9u3bdfvtt6t06dKqXLmyHnzwQZ04ccJjn/nz56tp06YKCgpStWrVNGLECI/7jxw5orvuukshISFq2LCh3n///eI9aACA6ShXAIArzoQJE3TPPffom2++0YABA9S3b1/t3LlTknTy5EnFxMSoYsWK+t///qdly5bp008/9ShPs2fP1iOPPKIHH3xQ27dv1/vvv68GDRp4PEd8fLx69+6tbdu2qUuXLhowYICOHj1aoscJAChZNsMwDLNDAADgLUOGDNGiRYsUHBzssX3cuHEaN26cbDabhg8frtmzZ7vvu+GGG9SqVSvNmjVLr7/+up588kn99ttvKlOmjCRp5cqV6tatm/bv36+wsDDVqFFDQ4cO1TPPPJNrBpvNpvHjx2vy5MmSzhW2smXL6uOPP+bcLwDwY5xzBQDwO7fddptHeZKkSpUquf87MjLS477IyEht3bpVkrRz5061aNHCXawk6cYbb5TT6dTu3btls9m0f/9+dezYMd8M1157rfu/y5Qpo/Lly+vQoUOXekgAAB9AuQIA+J0yZcrkmKbnLaVLly7UfgEBAR63bTabnE5ncUQCAFgE51wBAK44X3/9dY7bjRs3liQ1btxY33zzjU6ePOm+/6uvvpLdblejRo1Urlw51alTR8nJySWaGQBgfYxcAQD8TkZGhlJTUz22lSpVSqGhoZKkZcuWqU2bNrrpppv09ttva8OGDZo3b54kacCAAYqLi9PgwYM1adIkHT58WI8++qgGDhyosLAwSdKkSZM0fPhwVa1aVXfccYeOHz+ur776So8++mjJHigAwFIoVwAAv5OYmKhq1ap5bGvUqJF27dol6dxKfkuWLNHf/vY3VatWTe+8846aNGkiSQoJCdGqVas0atQoXX/99QoJCdE999yj6dOnux9r8ODBOnPmjF588UU98cQTCg0N1b333ltyBwgAsCRWCwQAXFFsNptWrFihnj17mh0FAOBnOOcKAAAAALyAcgUAAAAAXsA5VwCAKwqz4QEAxYWRKwAAAADwAsoVAAAAAHgB5QoAAAAAvIByBQAAAABeQLkCAAAAAC+gXAEAAACAF1CuAAAAAMALKFcAAAAA4AX/D0Fg17dhZeDMAAAAAElFTkSuQmCC",
|
| 270 |
+
"text/plain": [
|
| 271 |
+
"<Figure size 1000x600 with 1 Axes>"
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
"metadata": {},
|
| 275 |
+
"output_type": "display_data"
|
| 276 |
+
}
|
| 277 |
+
],
|
| 278 |
+
"source": [
|
| 279 |
+
"import matplotlib.pyplot as plt\n",
|
| 280 |
+
"\n",
|
| 281 |
+
"# Merge all the evaluation loss lists\n",
|
| 282 |
+
"eval_loss_lists = [\n",
|
| 283 |
+
" [\n",
|
| 284 |
+
" 0.07517127692699432,\n",
|
| 285 |
+
" 0.07137121260166168,\n",
|
| 286 |
+
" 0.06598775833845139,\n",
|
| 287 |
+
" 0.0005441228277049959,\n",
|
| 288 |
+
" 0.0002996980620082468,\n",
|
| 289 |
+
" 0.00021371280308812857,\n",
|
| 290 |
+
" 0.00028233605553396046,\n",
|
| 291 |
+
" 9.069988300325349e-05,\n",
|
| 292 |
+
" 7.004399230936542e-05,\n",
|
| 293 |
+
" 9.137028973782435e-05,\n",
|
| 294 |
+
" 5.340397547115572e-05,\n",
|
| 295 |
+
" 5.0301870942348614e-05\n",
|
| 296 |
+
" ],\n",
|
| 297 |
+
" [\n",
|
| 298 |
+
" 1.597152731847018e-05,\n",
|
| 299 |
+
" 1.162805529020261e-05,\n",
|
| 300 |
+
" 9.043936188390944e-06,\n",
|
| 301 |
+
" 1.379685454594437e-05,\n",
|
| 302 |
+
" 5.367660833144328e-06,\n",
|
| 303 |
+
" 4.6886875679774676e-06,\n",
|
| 304 |
+
" 4.490133960644016e-06,\n",
|
| 305 |
+
" 6.136821866675746e-06,\n",
|
| 306 |
+
" 3.3243470625166083e-06,\n",
|
| 307 |
+
" 2.348009729757905e-06,\n",
|
| 308 |
+
" 2.1804094103572425e-06,\n",
|
| 309 |
+
" 1.958705070137512e-06\n",
|
| 310 |
+
" ],\n",
|
| 311 |
+
" [\n",
|
| 312 |
+
" 3.93469099435606e-06,\n",
|
| 313 |
+
" 1.65619246672577e-06,\n",
|
| 314 |
+
" 1.1269650030953926e-06,\n",
|
| 315 |
+
" 8.881219173417776e-07,\n",
|
| 316 |
+
" 1.3077693665763945e-06,\n",
|
| 317 |
+
" 7.212336186057655e-07,\n",
|
| 318 |
+
" 6.988730092416517e-07,\n",
|
| 319 |
+
" 5.00343162457284e-07,\n",
|
| 320 |
+
" 4.1343139400851214e-07,\n",
|
| 321 |
+
" 5.06081335061026e-07,\n",
|
| 322 |
+
" 7.039822662591178e-07,\n",
|
| 323 |
+
" 5.087575800644117e-07\n",
|
| 324 |
+
" ],\n",
|
| 325 |
+
" [\n",
|
| 326 |
+
" 5.1233128033345565e-06,\n",
|
| 327 |
+
" 1.3323343637239304e-06,\n",
|
| 328 |
+
" 1.1789074960688595e-06,\n",
|
| 329 |
+
" 1.0221098136753426e-06,\n",
|
| 330 |
+
" 1.4271246300268103e-06,\n",
|
| 331 |
+
" 1.0917949566646712e-06,\n",
|
| 332 |
+
" 1.8720394336924073e-06,\n",
|
| 333 |
+
" 0.00015229727432597429,\n",
|
| 334 |
+
" 0.00016713247168809175,\n",
|
| 335 |
+
" 7.280236604856327e-05,\n",
|
| 336 |
+
" 5.6143608162528835e-06,\n",
|
| 337 |
+
" 1.2813707144232467e-06\n",
|
| 338 |
+
" ],\n",
|
| 339 |
+
" [\n",
|
| 340 |
+
" 1.7742066802384215e-06,\n",
|
| 341 |
+
" 3.1642618978366954e-06,\n",
|
| 342 |
+
" 2.774180939013604e-05,\n",
|
| 343 |
+
" 7.504659606638597e-06,\n",
|
| 344 |
+
" 1.0794157105920021e-06,\n",
|
| 345 |
+
" 8.346623303623346e-07,\n",
|
| 346 |
+
" 1.572396286064759e-06,\n",
|
| 347 |
+
" 4.874376031693828e-07,\n",
|
| 348 |
+
" 6.269995651564386e-07,\n",
|
| 349 |
+
" 5.949763703938515e-07,\n",
|
| 350 |
+
" 5.836409968651424e-07,\n",
|
| 351 |
+
" 5.382337917581026e-07\n",
|
| 352 |
+
" ],\n",
|
| 353 |
+
" [\n",
|
| 354 |
+
" 1.3506955838238355e-05,\n",
|
| 355 |
+
" 2.3305697141040582e-06,\n",
|
| 356 |
+
" 2.193627324231784e-06,\n",
|
| 357 |
+
" 3.027681714229402e-07,\n",
|
| 358 |
+
" 4.6904440864636854e-07,\n",
|
| 359 |
+
" 4.6231170358623785e-07,\n",
|
| 360 |
+
" 2.520739883493661e-07,\n",
|
| 361 |
+
" 2.040175957063184e-07,\n",
|
| 362 |
+
" 1.8624521658239246e-07,\n",
|
| 363 |
+
" 4.635896289073571e-07,\n",
|
| 364 |
+
" 2.6239982275910734e-07,\n",
|
| 365 |
+
" 2.4372931761718064e-07\n",
|
| 366 |
+
" ],\n",
|
| 367 |
+
" [\n",
|
| 368 |
+
" 5.271021564112743e-06,\n",
|
| 369 |
+
" 3.550181190803414e-06,\n",
|
| 370 |
+
" 2.5201459266099846e-06,\n",
|
| 371 |
+
" 2.8312820177234244e-06,\n",
|
| 372 |
+
" 1.4717104477313114e-06,\n",
|
| 373 |
+
" 2.2729768716089893e-06,\n",
|
| 374 |
+
" 1.030095177156909e-06,\n",
|
| 375 |
+
" 1.0983015954479924e-06,\n",
|
| 376 |
+
" 8.350090752173855e-07,\n",
|
| 377 |
+
" 4.235817687003873e-05,\n",
|
| 378 |
+
" 0.00017692078836262226,\n",
|
| 379 |
+
" 5.840817902935669e-05\n",
|
| 380 |
+
" ],\n",
|
| 381 |
+
" [\n",
|
| 382 |
+
" 1.2606010386662092e-06,\n",
|
| 383 |
+
" 7.131714937713696e-06,\n",
|
| 384 |
+
" 8.305702976940665e-06,\n",
|
| 385 |
+
" 6.520267561427318e-07,\n",
|
| 386 |
+
" 1.0400606953453462e-07,\n",
|
| 387 |
+
" 1.2373440938517888e-07,\n",
|
| 388 |
+
" 1.2282114880690642e-07,\n",
|
| 389 |
+
" 1.4778217405364558e-07,\n",
|
| 390 |
+
" 1.125305075788674e-07,\n",
|
| 391 |
+
" 4.522570762333089e-08,\n",
|
| 392 |
+
" 2.48692485911306e-05,\n",
|
| 393 |
+
" 5.199101238417825e-08\n",
|
| 394 |
+
" ],\n",
|
| 395 |
+
" [\n",
|
| 396 |
+
" 1.329818132944638e-06,\n",
|
| 397 |
+
" 9.433363743482914e-07,\n",
|
| 398 |
+
" 8.183121735783061e-07,\n",
|
| 399 |
+
" 1.0200094493484357e-06,\n",
|
| 400 |
+
" 7.936826023069443e-07,\n",
|
| 401 |
+
" 7.760887115182413e-07,\n",
|
| 402 |
+
" 2.45380675778506e-07,\n",
|
| 403 |
+
" 0.0001625938602956012,\n",
|
| 404 |
+
" 1.0732967581361663e-07,\n",
|
| 405 |
+
" 1.0528655138841714e-06,\n",
|
| 406 |
+
" 9.632424280425766e-07,\n",
|
| 407 |
+
" 7.961476740092621e-07\n",
|
| 408 |
+
" ],\n",
|
| 409 |
+
" [\n",
|
| 410 |
+
" 4.5500939904741244e-07,\n",
|
| 411 |
+
" 7.533798793701862e-07,\n",
|
| 412 |
+
" 4.7130234293035755e-07,\n",
|
| 413 |
+
" 7.465733347089554e-07,\n",
|
| 414 |
+
" 9.549980859446805e-07,\n",
|
| 415 |
+
" 6.432795771615929e-07,\n",
|
| 416 |
+
" 6.765155831089942e-07,\n",
|
| 417 |
+
" 6.765155831089942e-07,\n",
|
| 418 |
+
" 5.451398692457587e-07,\n",
|
| 419 |
+
" 4.994994355911331e-07,\n",
|
| 420 |
+
" 5.466189918479358e-07,\n",
|
| 421 |
+
" 4.268927682460344e-07\n",
|
| 422 |
+
" ],\n",
|
| 423 |
+
" [\n",
|
| 424 |
+
" 2.63293713942403e-07,\n",
|
| 425 |
+
" 3.551216138930613e-07,\n",
|
| 426 |
+
" 2.3628319922863739e-07,\n",
|
| 427 |
+
" 9.180489541904535e-07,\n",
|
| 428 |
+
" 1.1080908279836876e-06,\n",
|
| 429 |
+
" 6.248191084523569e-07,\n",
|
| 430 |
+
" 8.346111712853599e-07,\n",
|
| 431 |
+
" 5.276984325064404e-07,\n",
|
| 432 |
+
" 3.681239491015731e-07,\n",
|
| 433 |
+
" 1.8970614235058747e-07,\n",
|
| 434 |
+
" 3.114948299298703e-07,\n",
|
| 435 |
+
" 2.9696289516323304e-07\n",
|
| 436 |
+
" ],\n",
|
| 437 |
+
" # [\n",
|
| 438 |
+
" # 2.38517332036281e-05,\n",
|
| 439 |
+
" # 3.9089650272217114e-07,\n",
|
| 440 |
+
" # 6.718229883517779e-08,\n",
|
| 441 |
+
" # 1.4773820566915674e-07,\n",
|
| 442 |
+
" # 5.8338137876035034e-08,\n",
|
| 443 |
+
" # 3.57102081238736e-08,\n",
|
| 444 |
+
" # 2.2298079329630127e-06,\n",
|
| 445 |
+
" # 3.583775196602801e-07,\n",
|
| 446 |
+
" # 9.418199908850511e-08,\n",
|
| 447 |
+
" # 1.338206288892252e-06,\n",
|
| 448 |
+
" # 3.194011810592201e-07,\n",
|
| 449 |
+
" # 2.245769792352803e-07\n",
|
| 450 |
+
" # ],\n",
|
| 451 |
+
" [\n",
|
| 452 |
+
" 2.3522443370893598e-06,\n",
|
| 453 |
+
" 1.1711344996001571e-06,\n",
|
| 454 |
+
" 1.1321773172312533e-06,\n",
|
| 455 |
+
" 5.756968448622501e-07,\n",
|
| 456 |
+
" 4.4675923049908306e-07,\n",
|
| 457 |
+
" 4.365276993212319e-07,\n",
|
| 458 |
+
" 5.525398591998965e-07,\n",
|
| 459 |
+
" 4.404951710057503e-07,\n",
|
| 460 |
+
" 4.4630780848819995e-07,\n",
|
| 461 |
+
" 4.764913796861947e-07,\n",
|
| 462 |
+
" 4.10373701242861e-07,\n",
|
| 463 |
+
" 3.762708331578324e-07\n",
|
| 464 |
+
" ],\n",
|
| 465 |
+
" [\n",
|
| 466 |
+
" 2.1882451051169483e-07,\n",
|
| 467 |
+
" 5.146034354197582e-08,\n",
|
| 468 |
+
" 3.1587944704369875e-08,\n",
|
| 469 |
+
" 1.122993165125763e-08,\n",
|
| 470 |
+
" 8.033423704034703e-09,\n",
|
| 471 |
+
" 7.330823059703562e-09,\n",
|
| 472 |
+
" 2.0332389993882316e-08,\n",
|
| 473 |
+
" 1.718821529550496e-08,\n",
|
| 474 |
+
" 1.5028433608677005e-08,\n",
|
| 475 |
+
" 3.9828059072988253e-08,\n",
|
| 476 |
+
" 2.8266715190738978e-08,\n",
|
| 477 |
+
" 2.1497044144780375e-08\n",
|
| 478 |
+
" ],\n",
|
| 479 |
+
" [\n",
|
| 480 |
+
" 1.4871952558337398e-08,\n",
|
| 481 |
+
" 1.2490186662716951e-08,\n",
|
| 482 |
+
" 1.213749456496771e-08,\n",
|
| 483 |
+
" 1.159435214503901e-08,\n",
|
| 484 |
+
" 1.1296255486570317e-08,\n",
|
| 485 |
+
" 1.1153668211250078e-08,\n",
|
| 486 |
+
" 1.3103758966792611e-08,\n",
|
| 487 |
+
" 1.2461796927709656e-08,\n",
|
| 488 |
+
" 1.2030940688134706e-08,\n",
|
| 489 |
+
" 1.306745200935211e-08,\n",
|
| 490 |
+
" 1.029541429886649e-08,\n",
|
| 491 |
+
" 9.854548288501519e-09\n",
|
| 492 |
+
" ]\n",
|
| 493 |
+
"]\n",
|
| 494 |
+
"\n",
|
| 495 |
+
"# Flatten the nested list\n",
|
| 496 |
+
"merged_list = [item for sublist in eval_loss_lists for item in sublist]\n",
|
| 497 |
+
"\n",
|
| 498 |
+
"# Number of epochs\n",
|
| 499 |
+
"epochs = 20\n",
|
| 500 |
+
"\n",
|
| 501 |
+
"# Plotting the evaluation loss curve\n",
|
| 502 |
+
"plt.figure(figsize=(10, 6))\n",
|
| 503 |
+
"plt.plot(range(1, len(merged_list) + 1), merged_list, marker='o')\n",
|
| 504 |
+
"plt.title('Evaluation Loss Curve')\n",
|
| 505 |
+
"plt.xlabel('Epoch')\n",
|
| 506 |
+
"plt.ylabel('Evaluation Loss')\n",
|
| 507 |
+
"plt.xticks(range(1, len(merged_list) + 1, len(merged_list) // epochs))\n",
|
| 508 |
+
"plt.grid(True)\n",
|
| 509 |
+
"plt.show()\n"
|
| 510 |
+
]
|
| 511 |
+
},
|
| 512 |
+
{
|
| 513 |
+
"cell_type": "code",
|
| 514 |
+
"execution_count": 2,
|
| 515 |
+
"metadata": {},
|
| 516 |
+
"outputs": [
|
| 517 |
+
{
|
| 518 |
+
"data": {
|
| 519 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHHCAYAAACRAnNyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABVVklEQVR4nO3deVxU5f4H8M+ZYWbYBmRfBBHcUHDfUjMzcUHTNE1TKpdbt1zS8npft7q31NK63X6Zt7pp3UrrkmmLWlYuuKRp7jsuuOEOIrJvA8w8vz9gJkdQGJjhzAyf9+s1L5kzhzPfL6B+OM9zniMJIQSIiIiI7JBC7gKIiIiI7oZBhYiIiOwWgwoRERHZLQYVIiIislsMKkRERGS3GFSIiIjIbjGoEBERkd1iUCEiIiK7xaBCREREdotBhaiBSJKEefPmyfLev/76KyRJwq+//irL+5P9kyQJM2bMkLsMoioYVKhRWb58OSRJuutjz549cpdYLx999BGWL18udxlmHnzwQcTGxspdhuzu9XP33HPPyV0ekd1ykbsAIjm8/vrriIyMrLK9ZcuWMlRjPR999BH8/f0xadIks+0PPPAAiouLoVar5SmMAAADBw7EU089VWV769atZaiGyDEwqFCjFB8fj27dusldRoNRKBRwdXWVu4xGr3Xr1njiiSfkLoPIoXDoh+gOZWVl8PX1xeTJk6u8lpeXB1dXV8yZMwcAUFpaitdeew1du3aFt7c3PDw80LdvX2zbtq3G95k0aRKaN29eZfu8efMgSZLZtmXLluGhhx5CYGAgNBoN2rVrhyVLlpjt07x5c5w4cQLbt283DSk8+OCDAO4+R+Xbb79F165d4ebmBn9/fzzxxBO4du1alTo9PT1x7do1jBw5Ep6enggICMCcOXOg1+tr7LO2PvroI8TExECj0SA0NBTTp09HTk6O2T5nz57F6NGjERwcDFdXV4SFheHxxx9Hbm6uaZ+kpCTcf//9aNKkCTw9PdGmTRu88sor93zv2NhY9O/fv8p2g8GApk2bYsyYMaZtK1euRNeuXaHVauHl5YX27dvj3//+d/2av41xqOzgwYPo3bs33NzcEBkZiaVLl1bZNyMjA3/6058QFBQEV1dXdOzYEV988UW1ffz73/9G+/bt4erqioCAAAwZMgQHDhyosu/atWsRGxsLjUaDmJgYbNiwwez1/Px8vPDCC2jevDk0Gg0CAwMxcOBAHDp0yGpfA6Lb8YwKNUq5ubnIzMw02yZJEvz8/KBSqTBq1CisXr0aH3/8sdlwydq1a6HT6fD4448DqAgun376KcaPH49nnnkG+fn5+OyzzzB48GDs27cPnTp1skq9S5YsQUxMDEaMGAEXFxesW7cO06ZNg8FgwPTp0wEAixcvxvPPPw9PT0/8/e9/BwAEBQXd9ZjLly/H5MmT0b17d7z11lu4ceMG/v3vf2PXrl04fPgwmjRpYtpXr9dj8ODB6NmzJ/7v//4PmzdvxrvvvosWLVpg6tSp9e5v3rx5mD9/PuLi4jB16lSkpKRgyZIl2L9/P3bt2gWVSoXS0lIMHjwYOp0Ozz//PIKDg3Ht2jX89NNPyMnJgbe3N06cOIGHH34YHTp0wOuvvw6NRoNz585h165d93z/cePGYd68eUhPT0dwcLBp+86dO3H9+nXT9zspKQnjx4/HgAED8PbbbwMATp06hV27dmHWrFk19llSUlLl5w4AvLy8zH7OsrOzMXToUIwdOxbjx4/HN998g6lTp0KtVmPKlCkAgOLiYjz44IM4d+4cZsyYgcjISHz77beYNGkScnJyzOr505/+hOXLlyM+Ph5PP/00ysvL8dtvv2HPnj1mZxZ37tyJ1atXY9q0adBqtXj//fcxevRoXL58GX5+fgCA5557Dt999x1mzJiBdu3a4datW9i5cydOnTqFLl261Pg1ILKYIGpEli1bJgBU+9BoNKb9Nm7cKACIdevWmX3+0KFDRVRUlOl5eXm50Ol0ZvtkZ2eLoKAgMWXKFLPtAMTcuXNNzydOnCgiIiKq1Dh37lxx51/NoqKiKvsNHjzYrBYhhIiJiRH9+vWrsu+2bdsEALFt2zYhhBClpaUiMDBQxMbGiuLiYtN+P/30kwAgXnvtNbM6AYjXX3/d7JidO3cWXbt2rfJed+rXr5+IiYm56+sZGRlCrVaLQYMGCb1eb9r+4YcfCgDi888/F0IIcfjwYQFAfPvtt3c91nvvvScAiJs3b9ZY1+1SUlIEAPHBBx+YbZ82bZrw9PQ0ff1nzZolvLy8RHl5uUXHF0Lc9ecOgPj6669N+/Xr108AEO+++65pm06nE506dRKBgYGitLRUCCHE4sWLBQCRmJho2q+0tFT06tVLeHp6iry8PCGEEFu3bhUAxMyZM6vUZDAYzOpTq9Xi3Llzpm1Hjx6t8nXx9vYW06dPt7h/orri0A81Sv/5z3+QlJRk9li/fr3p9Yceegj+/v5YtWqVaVt2djaSkpIwbtw40zalUmn6TdhgMCArKwvl5eXo1q2bVU+Fu7m5mT42ng3q168fLly4YDbsUVsHDhxARkYGpk2bZjZ3ZdiwYYiOjsbPP/9c5XPuvDKlb9++uHDhgsXvfafNmzejtLQUL7zwAhSKP/5JeuaZZ+Dl5WWqxdvbGwCwceNGFBUVVXss41mgH374AQaDodY1tG7dGp06dTL7fuv1enz33XcYPny46evfpEkTFBYWIikpyaIejR555JEqP3dJSUlVhp1cXFzw7LPPmp6r1Wo8++yzyMjIwMGDBwEAv/zyC4KDgzF+/HjTfiqVCjNnzkRBQQG2b98OAPj+++8hSRLmzp1bpZ47hxjj4uLQokUL0/MOHTrAy8vL7PvcpEkT7N27F9evX6/T14DIUk4TVHbs2IHhw4cjNDQUkiRh7dq1Nn0/4zyC2x/R0dE2fU+ynh49eiAuLs7scft/Fi4uLhg9ejR++OEH6HQ6AMDq1atRVlZmFlQA4IsvvkCHDh3g6uoKPz8/BAQE4Oeff65TgLibXbt2IS4uDh4eHmjSpAkCAgJM8y7q8j6XLl0CALRp06bKa9HR0abXjYzzGm7n4+OD7Oxsi9+7trWo1WpERUWZXo+MjMTs2bPx6aefwt/fH4MHD8Z//vMfs/7HjRuHPn364Omnn0ZQUBAef/xxfPPNN7UKLePGjcOuXbtMc3R+/fVXZGRkmH2/p02bhtatWyM+Ph5hYWGYMmVKlTkc9xIWFlbl5y4uLq7KEF1oaCg8PDzMthmvDLp48SKAiq9bq1atzMIdALRt29b0OgCcP38eoaGh8PX1rbG+Zs2aVdl25/f5X//6F5KTkxEeHo4ePXpg3rx5VgmsRHfjNEGlsLAQHTt2xH/+858Ge8+YmBikpaWZHjt37myw9ybbe/zxx5Gfn2860/LNN98gOjoaHTt2NO2TmJiISZMmoUWLFvjss8+wYcMGJCUl4aGHHqrxP8c7f5s1unOC6vnz5zFgwABkZmZi0aJF+Pnnn5GUlIQXX3wRACw6c1BXSqXS5u9RG++++y6OHTuGV155BcXFxZg5cyZiYmJw9epVABVnnnbs2IHNmzfjySefxLFjxzBu3DgMHDiwxom/48aNgxAC3377LYCK77e3tzeGDBli2icwMBBHjhzBjz/+iBEjRmDbtm2Ij4/HxIkTbdd0A7rb91kIYfp47NixuHDhAj744AOEhobinXfeQUxMjNkZSSJrcpqgEh8fjwULFmDUqFHVvq7T6TBnzhw0bdoUHh4e6NmzZ71X6XRxcUFwcLDp4e/vX6/jkX154IEHEBISglWrViEzMxNbt26tcjblu+++Q1RUFFavXo0nn3wSgwcPRlxcHEpKSmo8vo+PT5WrWgBUOZuxbt066HQ6/Pjjj3j22WcxdOhQxMXFmQ0HGd0t/NwpIiICAJCSklLltZSUFNPrDeFutZSWliI1NbVKLe3bt8c//vEP7NixA7/99huuXbtmdkWMQqHAgAEDsGjRIpw8eRILFy7E1q1ba7wSKzIyEj169MCqVatQXl6O1atXY+TIkdBoNGb7qdVqDB8+HB999BHOnz+PZ599Fl9++SXOnTtXny+DmevXr6OwsNBs25kzZwDAdKVYREQEzp49WyWonj592vQ6ALRo0QLXr19HVlaW1eoLCQnBtGnTsHbtWqSmpsLPzw8LFy602vGJbuc0QaUmM2bMwO7du7Fy5UocO3YMjz32GIYMGYKzZ8/W+Zhnz55FaGgooqKikJCQgMuXL1uxYpKbQqHAmDFjsG7dOvzvf/9DeXl5laBi/A309t849+7di927d9d4/BYtWiA3NxfHjh0zbUtLS8OaNWtqfI/c3FwsW7asyjE9PDyqDT936tatGwIDA7F06VLT0BYArF+/HqdOncKwYcNqPIa1xMXFQa1W4/333zfr8bPPPkNubq6plry8PJSXl5t9bvv27aFQKEw9VPefsfHKq9v7vJtx48Zhz549+Pzzz5GZmVnl+33r1i2z5wqFAh06dKj18WurvLwcH3/8sel5aWkpPv74YwQEBKBr164AgKFDhyI9Pd1sXk15eTk++OADeHp6ol+/fgCA0aNHQwiB+fPnV3mf27/etaHX66sMNQYGBiI0NNSq/RPdrlFcnnz58mUsW7YMly9fRmhoKABgzpw52LBhA5YtW4Y333zT4mP27NkTy5cvR5s2bZCWlob58+ejb9++SE5OhlartXYLZGXr1683/eZ5u969eyMqKsr0fNy4cfjggw8wd+5ctG/f3jT+b/Twww9j9erVGDVqFIYNG4bU1FQsXboU7dq1Q0FBwT1rePzxx/G3v/0No0aNwsyZM1FUVIQlS5agdevWZhNxBw0aZPot/tlnn0VBQQH++9//IjAwEGlpaWbH7Nq1K5YsWYIFCxagZcuWCAwMxEMPPVTlvVUqFd5++21MnjwZ/fr1w/jx402XJzdv3tw0rGQtN2/exIIFC6psj4yMREJCAl5++WXMnz8fQ4YMwYgRI5CSkoKPPvoI3bt3Ny2QtnXrVsyYMQOPPfYYWrdujfLycvzvf/+DUqnE6NGjAVSsOLxjxw4MGzYMERERyMjIwEcffYSwsDDcf//9NdY5duxYzJkzB3PmzIGvry/i4uLMXn/66aeRlZWFhx56CGFhYbh06RI++OADdOrUqcrPRnXOnDmDxMTEKtuDgoIwcOBA0/PQ0FC8/fbbuHjxIlq3bo1Vq1bhyJEj+OSTT6BSqQAAf/7zn/Hxxx9j0qRJOHjwIJo3b47vvvsOu3btwuLFi03/DvXv3x9PPvkk3n//fZw9exZDhgyBwWDAb7/9hv79+1t0f5/8/HyEhYVhzJgx6NixIzw9PbF582bs378f7777bq2PQ2QRGa84shkAYs2aNabnxksuPTw8zB4uLi5i7NixQgghTp06dc/LBwGIv/3tb3d9z+zsbOHl5SU+/fRTW7dH9XCvy5MBiGXLlpntbzAYRHh4uAAgFixYUOV4BoNBvPnmmyIiIkJoNBrRuXNn8dNPP1V76THuuDxZCCE2bdokYmNjhVqtFm3atBGJiYnVXp78448/ig4dOghXV1fRvHlz8fbbb4vPP/9cABCpqamm/dLT08WwYcOEVqsVAEyXKt95ebLRqlWrROfOnYVGoxG+vr4iISFBXL161WyfiRMnCg8Pjyq9V1dndYyX21b3GDBggGm/Dz/8UERHRwuVSiWCgoLE1KlTRXZ2tun1CxcuiClTpogWLVoIV1dX4evrK/r37y82b95s2mfLli3ikUceEaGhoUKtVovQ0FAxfvx4cebMmRrrNOrTp48AIJ5++ukqr3333Xdi0KBBIjAwUKjVatGsWTPx7LPPirS0tBqPe6+fu9svKTdezn3gwAHRq1cv4erqKiIiIsSHH35Y5Zg3btwQkydPFv7+/kKtVov27dtX+RkWouIy+nfeeUdER0cLtVotAgICRHx8vDh48KBZfdVddhwRESEmTpwohKi4TPqvf/2r6Nixo9BqtcLDw0N07NhRfPTRRzX2T1RXkhAWnvtzAJIkYc2aNRg5ciQAYNWqVUhISMCJEyeqTBbz9PREcHAwSktLa5y5bryi4266d++OuLg4vPXWW/XugYgapwcffBCZmZlITk6WuxQiu9Aohn46d+4MvV6PjIwM9O3bt9p91Gp1vS4vLigowPnz5/Hkk0/W+RhERERkzmmCSkFBgdms+9TUVBw5cgS+vr5o3bo1EhIS8NRTT+Hdd99F586dcfPmTWzZsgUdOnSo08TBOXPmYPjw4YiIiMD169cxd+5cKJVKs8WXiIiIqH6cJqgcOHDAbMGu2bNnAwAmTpyI5cuXY9myZViwYAH+8pe/4Nq1a/D398d9992Hhx9+uE7vd/XqVYwfPx63bt1CQEAA7r//fuzZs+eeQ0NERERkGaeco0JERETOodGso0JERESOh0GFiIiI7JZDz1ExGAy4fv06tFptrZcOJyIiInkJIZCfn4/Q0NAqN9a8k0MHlevXryM8PFzuMoiIiKgOrly5grCwsHvu49BBxbhE9JUrV+Dl5WXVY5eVlWHTpk0YNGiQaclqZ8L+HJ+z9+js/QHO3yP7c3y26jEvLw/h4eG1uuWMQwcV43CPl5eXTYKKu7s7vLy8nPIHkP05Pmfv0dn7A5y/R/bn+GzdY22mbXAyLREREdktBhUiIiKyWwwqREREZLcYVIiIiMhuMagQERGR3WJQISIiIrvFoEJERER2i0GFiIiI7BaDChEREdktBpVq6A0Ce1OzcDBTwt7ULOgNQu6SiIiIGiWHXkLfFjYkp2H+upNIyy0BoMSXZw8gxNsVc4e3w5DYELnLIyIialR4RuU2G5LTMDXxUGVI+UN6bgmmJh7ChuQ0mSojIiJqnBhUKukNAvPXnUR1gzzGbfPXneQwEBERUQNiUKm0LzWrypmU2wkAabkl2Jea1XBFERERNXIMKpUy8u8eUuqyHxEREdUfg0qlQK2rVfcjIiKi+mNQqdQj0hch3q6Q7vK6BCDE2xU9In0bsiwiIqJGjUGlklIhYe7wdgBw17Ayd3g7KBV3e5WIiIisjUHlNkNiQ7DkiS4I9jYf3nFVKbDkiS5cR4WIiKiBccG3OwyJDcHAdsHYfS4Dq5L2Yt0VJcrKDegS4SN3aURERI0Oz6hUQ6mQ0DPSF3FhAp3DvaEXwLcHrspdFhERUaPDoFKD8d3DAQBf77sMAxd7IyIialAMKjWIjw2Ct5sKV7OLsePsTbnLISIialQYVGrgqlLi0S5NAQBf7b0sczVERESNC4NKLST0bAYA2Ho6A+n3WGafiIiIrItBpRZaBmrRI9IXeoPAqv1X5C6HiIio0WBQqSXjWZWV+y+jXG+QuRoiIqLGgUGllobEBsPXQ4203BL8msJJtURERA2BQaWWNC5KjOkaBgBYsY+TaomIiBoCg4oFxveoGP7ZlpKBq9lFMldDRETk/BhULBDp74E+Lf0gBDiploiIqAEwqFhoQo8IABVBpYyTaomIiGyKQcVCA9sFwd9TjYx8HbacuiF3OURERE6NQcVCahcFHutWcf8frlRLRERkWwwqdTC+ezNIEvDb2UxcvsVJtURERLbCoFIHzfzc0bdVAABeqkxERGRLDCp1NKHyUuXvDl5BaTkn1RIREdkCg0odDWgbiCAvDTILSrHpZLrc5RARETklBpU6UikVGGecVLuHwz9ERES2wKBSD+N6NINCAnZfuIULNwvkLoeIiMjpMKjUQ9MmbujfJhAA8DUn1RIREVkdg0o9TehZMan224NXUVKml7kaIiIi58KgUk8PtglEqLcrcorKsCGZk2qJiIisiUGlnpQKCY9XXqq8givVEhERWRWDihWM6x4OpULCvotZOHMjX+5yiIiInAaDihUEebliQHTFpFqeVSEiIrIeBhUrSbgvAgCw+tBVFJdyUi0REZE1MKhYSd+W/gj3dUNeSTl+OnZd7nKIiIicAoOKlSgUEh7vXjmplmuqEBERWQWDihWN7RYOF4WEw5dzcPJ6ntzlEBEROTwGFSsK0GowOCYYALBi3yWZqyEiInJ8sgYVvV6PV199FZGRkXBzc0OLFi3wxhtvQAghZ1n1Ylypdu3h6yjUlctcDRERkWNzkfPN3377bSxZsgRffPEFYmJicODAAUyePBne3t6YOXOmnKXVWa8oP0T6eyA1sxDrjl43LQZHRERElpP1jMrvv/+ORx55BMOGDUPz5s0xZswYDBo0CPv27ZOzrHpRKCSM7xEOAPiKa6oQERHVi6xBpXfv3tiyZQvOnDkDADh69Ch27tyJ+Ph4OcuqtzFdw6FWKnD8Wi6OX82VuxwiIiKHJevQz0svvYS8vDxER0dDqVRCr9dj4cKFSEhIqHZ/nU4HnU5nep6XV3FlTVlZGcrKyqxam/F4dTmuVi1hcEwg1h1LR+KeVCx4JMaqtVlDffpzBM7eH+D8PTp7f4Dz98j+HJ+terTkeJKQcebqypUr8de//hXvvPMOYmJicOTIEbzwwgtYtGgRJk6cWGX/efPmYf78+VW2r1ixAu7u7g1Rcq2dywM+OOECtULgja56uMoaCYmIiOxHUVERJkyYgNzcXHh5ed1zX1mDSnh4OF566SVMnz7dtG3BggVITEzE6dOnq+xf3RmV8PBwZGZm1tiopcrKypCUlISBAwdCpVJZ/PlCCMR/8DvO3yzEvOFtkVA5b8Ve1Lc/e+fs/QHO36Oz9wc4f4/sz/HZqse8vDz4+/vXKqjI+nt+UVERFArzaTJKpRIGg6Ha/TUaDTQaTZXtKpXKZj8k9Tl2Qs8IvP7TSaw6cA0Te0dCkiQrV1d/tvza2QNn7w9w/h6dvT/A+Xtkf47P2j1acixZJ9MOHz4cCxcuxM8//4yLFy9izZo1WLRoEUaNGiVnWVYzuksYNC4KnErLw+ErOXKXQ0RE5HBkDSoffPABxowZg2nTpqFt27aYM2cOnn32WbzxxhtylmU13u4qDOsQAgBYwUuViYiILCZrUNFqtVi8eDEuXbqE4uJinD9/HgsWLIBarZazLKtK6BkBAPjp2HXkFjnvzHAiIiJb4L1+bKxLsyaIDtaipMyA1Yevyl0OERGRQ2FQsTFJkkz3/1mx97JD38eIiIiooTGoNICRnZvCTaXE2YwC7L+YLXc5REREDoNBpQF4uaowomMoAGDF3ksyV0NEROQ4GFQaiHH455fkdGQVlspcDRERkWNgUGkgHcK8EdvUC6XlBnx/kJNqiYiIaoNBpYFIkoQJPSouVf56HyfVEhER1QaDSgMa0SkUHmolLmQWYveFW3KXQ0REZPcYVBqQp8YFIzs3BQB8xZVqiYiIasSg0sCMk2o3nUhHZoGuhr2JiIgaNwaVBhYT6o2O4U1Qphf49gAn1RIREd0Lg4oMEirPqny97zIMBk6qJSIiuhsGFRkM7xAKrasLLmcVYee5TLnLISIislsMKjJwUyvxaOWk2hWcVEtERHRXDCoymdCzYk2VpFM3cCOvROZqiIiI7BODikzaBGvRLcIHeoPAN/uvyF0OERGRXWJQkZHxUuWV+69Az0m1REREVTCoyGho+xA0cVfhWk4xtp/JkLscIiIiu8OgIiNXlRKju4QB4KRaIiKi6jCoyGx8j4rhn62nM3A9p1jmaoiIiOwLg4rMWgZ64r4oXxhExVwVIiIi+gODih0wXqq8av9llOsNMldDRERkPxhU7MDgmCD4eqhxI0+Hrac5qZaIiMiIQcUOaFyUeKxbxaTarziploiIyIRBxU6M714xqXbH2Zu4klUkczVERET2gUHFTjT398D9Lf0hBLByP8+qEBERAQwqdiWhcqXaVfuvooyTaomIiBhU7ElcuyAEaDXILNAh6eQNucshIiKSHYOKHVEpFRjbjSvVEhERGTGo2JnHuzeDJAE7z2XiYmah3OUQERHJikHFzoT7uqNf6wAAwNf7eFaFiIgaNwYVOzSh8v4/3x68Cl25XuZqiIiI5MOgYoceig5EsJcrsgpLsSE5Xe5yiIiIZMOgYodclAqM6x4OgJNqiYiocWNQsVOP9wiHQgL2pmbhXEaB3OUQERHJgkHFToV4u+Gh6CAAPKtCRESNF4OKHTOuVPv9oasoKeOkWiIianwYVOzYA60D0LSJG3KLy/DL8TS5yyEiImpwDCp2TKmQML5HxaTarzj8Q0REjRCDip0b2y0cLgoJBy9lIyU9X+5yiIiIGhSDip0L9HJFXFvjpNpLMldDRETUsBhUHEDCfRWTalcfuoai0nKZqyEiImo4DCoOoE8LfzTzdUe+rhw/HeWkWiIiajwYVByAQiFhfOX9f77ijQqJiKgRYVBxEI91C4NKKeHolRwkX8uVuxwiIqIGwaDiIPw9NRgcEwwAWMGzKkRE1EgwqDiQCZUr1f5w+BoKdJxUS0REzo9BxYH0ivJDlL8HCkv1+OHINbnLISIisjkGFQciSZLprMqKvZchhJC5IiIiIttiUHEwo7uEQe2iwInreTh2lZNqiYjIuTGoOBgfDzWGxlZMqv2KK9USEZGTY1BxQAn3RQAA1h1NQ15JmczVEBER2Q6DigPqFuGDVoGeKC7TY+1hTqolIiLnxaDigG6fVPvVHk6qJSIi58Wg4qAe7RwGV5UCKTfycehyttzlEBER2QSDioPydlfh4Q6hAICv9nKlWiIick4MKg7MOPzz07E05BSVylwNERGR9TGoOLDO4U3QNsQLpeUGfH+Ik2qJiMj5MKg4MPOVai9xUi0RETkdBhUHN7JTKNzVSpy/WYi9qVlyl0NERGRVDCoOTuuqwiOdKibVruCkWiIicjIMKk5gQo+KlWo3JKfjVoFO5mqIiIish0HFCbQP80b7pt4o1Rvw3cGrcpdDRERkNbIHlWvXruGJJ56An58f3Nzc0L59exw4cEDushxOQuWk2q/3XYbBwEm1RETkHGQNKtnZ2ejTpw9UKhXWr1+PkydP4t1334WPj4+cZTmk4R1D4alxwcVbRdh94Zbc5RAREVmFi5xv/vbbbyM8PBzLli0zbYuMjJSxIsfloXHByM6hSNxzGV/tvYQ+Lf3lLomIiKjeZA0qP/74IwYPHozHHnsM27dvR9OmTTFt2jQ888wz1e6v0+mg0/0xWTQvLw8AUFZWhrKyMqvWZjyetY9rS2O7NEXinsvYdOIGrmcVIECrueu+jtifJZy9P8D5e3T2/gDn75H9OT5b9WjJ8SRh4SphxcXFEELA3d0dAHDp0iWsWbMG7dq1w6BBgywq1NXVFQAwe/ZsPPbYY9i/fz9mzZqFpUuXYuLEiVX2nzdvHubPn19l+4oVK0z1NHbvHVfiYoGEh5vpMbAp56oQEZH9KSoqwoQJE5CbmwsvL6977mtxUBk0aBAeffRRPPfcc8jJyUF0dDRUKhUyMzOxaNEiTJ06tdbHUqvV6NatG37//XfTtpkzZ2L//v3YvXt3lf2rO6MSHh6OzMzMGhu1VFlZGZKSkjBw4ECoVCqrHtuWvj90DS+tOYGwJq7Y8mJfKBRStfs5an+15ez9Ac7fo7P3Bzh/j+zP8dmqx7y8PPj7+9cqqFg89HPo0CG89957AIDvvvsOQUFBOHz4ML7//nu89tprFgWVkJAQtGvXzmxb27Zt8f3331e7v0ajgUZTdThDpVLZ7IfElse2hUc6h+PN9Sm4mlOC3Rdz8GCbwHvu72j9WcrZ+wOcv0dn7w9w/h7Zn+Ozdo+WHMviq36Kioqg1WoBAJs2bcKjjz4KhUKB++67D5cuXbLoWH369EFKSorZtjNnziAiIsLSsqiSm1qJR7uEAeBKtURE5PgsDiotW7bE2rVrceXKFWzcuNE0LyUjI8Pi4ZcXX3wRe/bswZtvvolz585hxYoV+OSTTzB9+nRLy6LbGNdU2XI6A+m5JTJXQ0REVHcWB5XXXnsNc+bMQfPmzdGzZ0/06tULQMXZlc6dO1t0rO7du2PNmjX4+uuvERsbizfeeAOLFy9GQkKCpWXRbVoFadGjuS/0BoFV+6/IXQ4REVGdWTxHZcyYMbj//vuRlpaGjh07mrYPGDAAo0aNsriAhx9+GA8//LDFn0f3NqFnM+y7mIWV+y9jev8WcFHKvggxERGRxer0v1dwcDA6d+4MhUKBvLw8rF27FlqtFtHR0dauj+poSGwwfNxVSMstwa8pN+Uuh4iIqE4sDipjx47Fhx9+CKBiTZVu3bph7Nix6NChw12v1qGG56pSYkzXykm1+ziploiIHJPFQWXHjh3o27cvAGDNmjUQQiAnJwfvv/8+FixYYPUCqe7G96iYVLstJQNXs4tkroaIiMhyFgeV3Nxc+Pr6AgA2bNiA0aNHw93dHcOGDcPZs2etXiDVXVSAJ3pF+UEIcFItERE5JIuDSnh4OHbv3o3CwkJs2LDBdHlydna2aUl8sh8J91WcVVm1/wrK9AaZqyEiIrKMxUHlhRdeQEJCAsLCwhAaGooHH3wQQMWQUPv27a1dH9XToHbB8PdUIyNfhy2nMuQuh4iIyCIWB5Vp06Zh9+7d+Pzzz7Fz504oFBWHiIqK4hwVO6R2UWBM13AAwFd7LVs5mIiISG4Wr6MCAN26dUO3bt0ghIAQApIkYdiwYdaujaxkQo9mWLr9PH47m4nLt4rQzI93miYiIsdQp3VUvvzyS7Rv3x5ubm5wc3NDhw4d8L///c/atZGVNPNzR99W/gCAr/fzUmUiInIcFgeVRYsWYerUqRg6dCi++eYbfPPNNxgyZAiee+45012Vyf4Y7//z7YErKC3npFoiInIMFg/9fPDBB1iyZAmeeuop07YRI0YgJiYG8+bNw4svvmjVAsk6BrQNQqBWg4x8HTadTMfgtgFyl0RERFQji8+opKWloXfv3lW29+7dG2lpaVYpiqxPpVRgXPeKSbUr9nL4h4iIHIPFQaVly5b45ptvqmxftWoVWrVqZZWiyDbGdQ+HJAG/n7+F1MxCucshIiKqkcVDP/Pnz8e4ceOwY8cO9OnTBwCwa9cubNmypdoAQ/YjzMcd/dsEYuvpDCzecg7+Ogl+qVno1TIQSoUkd3lERERVWBxURo8ejb179+K9997D2rVrAQBt27bFvn370LlzZ2vXR1bWJkiLracz8EvyDQBKfHn2AEK8XTF3eDsMiQ2RuzwiIiIzdbo8uWvXrkhMTMTBgwdx8OBBJCYmomnTpnjzzTetXR9Z0YbkNCzdfr7K9vTcEkxNPIQNyZxjRERE9qVOQaU6aWlpePXVV611OLIyvUFg/rqTENW8Ztw2f91J6A3V7UFERCQPqwUVsm/7UrOQllty19cFgLTcEuxLzWq4ooiIiGrAoNJIZOTfPaTUZT8iIqKGwKDSSARqXa26HxERUUOo9VU/s2fPvufrN2/erHcxZDs9In0R4u2K9NySauepSACCvV3RI9K3oUsjIiK6q1oHlcOHD9e4zwMPPFCvYsh2lAoJc4e3w9TEQ5CAasPK3OHtuJ4KERHZlVoHlW3bttmyDmoAQ2JDsOSJLpi/7qTZxFq1UsL74ztzHRUiIrI7Fi/4Ro5tSGwIBrYLxu5zGVi9dS9WX1SiVC/QJcJH7tKIiIiq4GTaRkipkNAz0hf9QgQ6NPUCAGw8cUPmqoiIiKpiUGnkBrULAgCuSktERHaJQaWRGxJTEVT2XMhCdmGpzNUQERGZY1Bp5CL83BEdrIXeIJB0ksM/RERkX+o0mTYnJwf79u1DRkYGDAaD2WtPPfWUVQqjhhMfG4LT6flYn5yGsd3D5S6HiIjIxOKgsm7dOiQkJKCgoABeXl6QpD/W3ZAkiUHFAcW3D8Z7m89g57lM5JWUwctVJXdJREREAOow9POXv/wFU6ZMQUFBAXJycpCdnW16ZGXxhnaOqFWgJ6ICPFCmF9h6KkPucoiIiEwsDirXrl3DzJkz4e7ubot6SAaSJCE+NhgAsJ5X/xARkR2xOKgMHjwYBw4csEUtJKP4ylVpt5+5iaLScpmrISIiqmDxHJVhw4bhr3/9K06ePIn27dtDpTKfzzBixAirFUcNJybUC2E+briaXYxfU25iaHsup09ERPKzOKg888wzAIDXX3+9ymuSJEGv19e/KmpwxuGf//6WivXJ6QwqRERkFywe+jEYDHd9MKQ4NuNNCbeeuoGSMn4viYhIflzwjUw6hzdBkJcGhaV67DybKXc5REREdQsq27dvx/Dhw9GyZUu0bNkSI0aMwG+//Wbt2qiBKRQShsQYr/5Jl7kaIiKiOgSVxMRExMXFwd3dHTNnzsTMmTPh5uaGAQMGYMWKFbaokRqQcfhn86kbKNMbatibiIjItiyeTLtw4UL861//wosvvmjaNnPmTCxatAhvvPEGJkyYYNUCqWH1iPSFn4catwpLsfv8LTzQOkDukoiIqBGz+IzKhQsXMHz48CrbR4wYgdTUVKsURfJRKiQMqryjMod/iIhIbhYHlfDwcGzZsqXK9s2bNyM8nDe0cwbG4Z+kk+nQG4TM1RARUWNm8dDPX/7yF8ycORNHjhxB7969AQC7du3C8uXL8e9//9vqBVLD6xXlBy9XF2QWlGL/xSzcF+Und0lERNRIWRxUpk6diuDgYLz77rv45ptvAABt27bFqlWr8Mgjj1i9QGp4ahcF4toFYfWha9iQnM6gQkREsrE4qADAqFGjMGrUKGvXQnYkPjbEFFRee7gdFApJ7pKIiKgR4oJvVK2+rfzhoVYiPa8ER67myF0OERE1UrUKKr6+vsjMrFip1MfHB76+vnd9kHNwVSnRPzoQALCBV/8QEZFMajX0895770Gr1Zo+liQOAzQG8bEh+OlYGtYnp+Hl+Gh+34mIqMHVKqhMnDjR9PGkSZNsVQvZmQfbBEDjosCVrGKcuJ6H2KbecpdERESNjMVzVJRKJTIyMqpsv3XrFpRKpVWKIvvgoXFBv8qVaTn8Q0REcrA4qAhR/QJgOp0OarW63gWRfYlvb7xJYZrMlRARUWNU68uT33//fQCAJEn49NNP4enpaXpNr9djx44diI6Otn6FJKuHooOgUko4f7MQZ2/ko1WQVu6SiIioEal1UHnvvfcAVJxRWbp0qdkwj1qtRvPmzbF06VLrV0iy8nZToU9Lf/yachPrk9MZVIiIqEHVOqgYbzjYv39/rF69Gj4+PjYriuxLfGywKajMHNBK7nKIiKgRsXiOyrZt2xhSGpmB7YKhVEg4lZaHS7cK5S6HiIgakTotoX/16lX8+OOPuHz5MkpLS81eW7RokVUKI/vh66FGz0hf/H7+FtYnp+O5fi3kLomIiBoJi4PKli1bMGLECERFReH06dOIjY3FxYsXIYRAly5dbFEj2YH42GAGFSIianAWD/28/PLLmDNnDo4fPw5XV1d8//33uHLlCvr164fHHnvMFjWSHRgcEwxJAo5eycH1nGK5yyEiokbC4qBy6tQpPPXUUwAAFxcXFBcXw9PTE6+//jrefvttqxdI9iHQyxVdm1XMTeLib0RE1FAsDioeHh6meSkhISE4f/686TXjjQvJOQ2JrVj8jUGFiIgaisVB5b777sPOnTsBAEOHDsVf/vIXLFy4EFOmTMF9991n9QLJfhiDyv5LWcjIL5G5GiIiagwsDiqLFi1Cz549AQDz58/HgAEDsGrVKjRv3hyfffaZ1Qsk+xHm444OYd4QAth04obc5RARUSNg8VU/UVFRpo89PDy4Gm0jMyQ2GMeu5mJDcjqeuC9C7nKIiMjJWXxGxVb++c9/QpIkvPDCC3KXQvcQHxsCANh94RayC0tr2JuIiKh+LA4qCoUCSqXyro+62L9/Pz7++GN06NChTp9PDSfS3wPRwVroDQJJpzj8Q0REtmXx0M+aNWvMnpeVleHw4cP44osvMH/+fIsLKCgoQEJCAv773/9iwYIFFn8+NbwhscE4nZ6PDcnpGNstXO5yiIjIiVkcVB555JEq28aMGYOYmBisWrUKf/rTnyw63vTp0zFs2DDExcXVGFR0Oh10Op3peV5eHoCKsFRWVmbR+9bEeDxrH9de1Ke/gdH+WLz5LH47exNZ+cXQutbpTgw25ezfP8D5e3T2/gDn75H9OT5b9WjJ8SQhhLDGm164cAEdOnRAQUFBrT9n5cqVWLhwIfbv3w9XV1c8+OCD6NSpExYvXlzt/vPmzav2rM2KFSvg7u5e19LJQkIAbx5RIqNEwpMt9egWYJUfISIiaiSKioowYcIE5ObmwsvL6577WuVX4eLiYrz//vto2rRprT/nypUrmDVrFpKSkuDq6lqrz3n55Zcxe/Zs0/O8vDyEh4dj0KBBNTZqqbKyMiQlJWHgwIFQqVRWPbY9qG9/p9VnsXRHKm6oQzB0aCfrF1hPzv79A5y/R2fvD3D+Htmf47NVj8YRkdqwOKj4+PhAkiTTcyEE8vPz4e7ujsTExFof5+DBg8jIyDC7kaFer8eOHTvw4YcfQqfTVZmcq9FooNFoqhxLpVLZ7IfElse2B3Xtb1iHpli6IxU7zmaiTEhwV9vf8A/g/N8/wPl7dPb+AOfvkf05Pmv3aMmxLP7f5b333jMLKgqFAgEBAejZsyd8fHxqfZwBAwbg+PHjZtsmT56M6Oho/O1vf6vzFUTUMGKbeiHMxw1Xs4uxPeUm4tuHyF0SERE5IYuDyqRJk6zyxlqtFrGxsWbbPDw84OfnV2U72R9JkjAkJhif7kzF+uR0BhUiIrKJWgWVY8eO1fqAXAul8YhvXxFUtp7OgK5cD40Lz4IREZF11SqodOrUCZIkoaYLhCRJgl6vr3Mxv/76a50/lxpe53AfBHlpcCNPh51nMzGgbZDcJRERkZOpVVBJTU21dR3kgBQKCYNjgvHl7ktYn5zOoEJERFZXq6ASEcGbz1H1hsRWBJWkkzdQpjdApbSb20cREZETqPM1pSdPnsTly5dRWmp+Y7oRI0bUuyhyHD2a+8LXQ42swlLsuXALfVsFyF0SERE5EYuDyoULFzBq1CgcP37cbN6K8ZLl+sxRIcfjolRgULsgrNx/BeuT0xlUiIjIqiw+Tz9r1ixERkYiIyMD7u7uOHHiBHbs2IFu3bpxMmwjNSQ2GACw6UQ69AYup09ERNZjcVDZvXs3Xn/9dfj7+0OhUEChUOD+++/HW2+9hZkzZ9qiRrJzvVv4Q+vqgsyCUhy4mCV3OURE5EQsDip6vR5arRYA4O/vj+vXrwOomHCbkpJi3erIIahdFBhYecXP+uR0mashIiJnYnFQiY2NxdGjRwEAPXv2xL/+9S/s2rULr7/+OqKioqxeIDkG4/DPxhPpMHD4h4iIrMTioPKPf/wDBoMBAPD6668jNTUVffv2xS+//IL333/f6gWSY3igdQDc1Uqk5Zbg6NUcucshIiInYfFVP4MHDzZ93LJlS5w+fRpZWVlV7qpMjYurSon+0YH4+VgaNiSno3Oz2t+gkoiI6G4sPqOSmJiIwsJCs22+vr4MKYT4yuGf9cnpNd5ugYiIqDYsDiovvvgigoKCMGHCBPzyyy9cN4VM+rcJhMZFgctZRTiZlid3OURE5AQsDippaWlYuXIlJEnC2LFjERISgunTp+P333+3RX3kQDw0LnigdcWCbxt49Q8REVmBxUHFxcUFDz/8ML766itkZGTgvffew8WLF9G/f3+0aNHCFjWSA7l9+IeIiKi+6nyvHwBwd3fH4MGDkZ2djUuXLuHUqVPWqosc1IC2QVApJZzLKMC5jHy0DNTKXRIRETmwOt3qtqioCF999RWGDh2Kpk2bYvHixRg1ahROnDhh7frIwXi7qdC7hT8AYP1xnlUhIqL6sTioPP744wgMDMSLL76IqKgo/Prrrzh37hzeeOMNREdH26JGcjAc/iEiImuxeOhHqVTim2++weDBg6FUKm1REzm4ge2C8Mqa4ziZlofLt4rQzM9d7pKIiMhBWXxGxTjkw5BCd+PnqUHPSD8AwPrkNJmrISIiR1broDJ06FDk5uaanv/zn/9ETk6O6fmtW7fQrl07qxZHjiu+PYd/iIio/modVDZu3AidTmd6/uabbyIrK8v0vLy8nHdPJpPBMRVB5ciVHKTlFstcDREROapaB5U7l0TnEul0L0FerugaUXG/n408q0JERHVUp8uTiWqDV/8QEVF91TqoSJJU5caDvBEh3Ytx+Gf/xSxkFuhq2JuIiKiqWl+eLITApEmToNFoAAAlJSV47rnn4OHhAQBm81eIACDc1x3tm3rj+LVcbDpxAxN6NpO7JCIicjC1DioTJ040e/7EE09U2eepp56qf0XkVIbEBuP4tVysT05jUCEiIovVOqgsW7bMlnWQk4qPDcY7G1Ow+/wt5BaVwdtdJXdJRETkQDiZlmwqKsATbYK0KDcIJJ26IXc5RETkYBhUyOaGVF79s4Gr1BIRkYUYVMjmjKvU7jibiQJduczVEBGRI2FQIZtrE6RFpL8HSssN2Ho6Q+5yiIjIgTCokM1JksThHyIiqhMGFWoQxlVqt52+ieJSvczVEBGRo2BQoQbRvqk3mjZxQ3GZHtvP3JS7HCIichAMKtQgOPxDRER1waBCDcY4/LPlVAZ05Rz+ISKimjGoUIPp0swHgVoN8nXl+P3cLbnLISIiB8CgQg1GoZBMd1Rez+EfIiKqBQYValDG4Z+kkzdQrjfIXA0REdk7BhVqUD0ifeHjrkJ2URn2pmbJXQ4REdk5BhVqUC5KBQa14/APERHVDoMKNbghlff+2XjiBgwGIXM1RERkzxhUqMH1aeEPrasLbubrcPByttzlEBGRHWNQoQandlEgrm0QAGD98XSZqyEiInvGoEKyMK5Su/FEOoTg8A8REVWPQYVk0a91ANzVSlzLKcaxq7lyl0NERHaKQYVk4apSon+bQADA+mQO/xARUfUYVEg2t9+kkMM/RERUHQYVkk3/6ECoXRS4eKsIp9Pz5S6HiIjsEIMKycZT44IHWgUA4PAPERFVj0GFZBV/2/APERHRnRhUSFZxbYPgopBw5kYBzt8skLscIiKyMwwqJCtvdxV6t/QHAGzg8A8REd2BQYVkZxz+4U0KiYjoTgwqJLtB7YKgkIDka3m4klUkdzlERGRHGFRIdn6eGvSI9AXA4R8iIjLHoEJ2IT42BACHf4iIyByDCtmFwTEV81QOXc5Bem6JzNUQEZG9YFAhuxDs7YouzZoAqLijMhEREcCgQnaEwz9ERHQnBhWyG8abFO5LzcKtAp3M1RARkT1gUCG7Ee7rjtimXjAIYNPJG3KXQ0REdkDWoPLWW2+he/fu0Gq1CAwMxMiRI5GSkiJnSSSzP4Z/OE+FiIhkDirbt2/H9OnTsWfPHiQlJaGsrAyDBg1CYWGhnGWRjIzDP7+fy0RuUZnM1RARkdxc5HzzDRs2mD1fvnw5AgMDcfDgQTzwwAMyVUVyahHgidZBnjhzowCbT93A6K5hcpdEREQykjWo3Ck3NxcA4OvrW+3rOp0OOt0fkyzz8vIAAGVlZSgrs+5v38bjWfu49sKe+xvUNhBnbhTgl+PXMaJDUJ2OYc/9WYuz9+js/QHO3yP7c3y26tGS40lCCGHVd68jg8GAESNGICcnBzt37qx2n3nz5mH+/PlVtq9YsQLu7u62LpEayLVC4F/HXOAiCSzsroerUu6KiIjImoqKijBhwgTk5ubCy8vrnvvaTVCZOnUq1q9fj507dyIsrPrT/dWdUQkPD0dmZmaNjVqqrKwMSUlJGDhwIFQqlVWPbQ/suT8hBAYu3oVLWUVYPLYDhrUPtvgY9tyftTh7j87eH+D8PbI/x2erHvPy8uDv71+roGIXQz8zZszATz/9hB07dtw1pACARqOBRqOpsl2lUtnsh8SWx7YH9tpffPsQLN1+HkmnbmJkl/A6H8de+7MmZ+/R2fsDnL9H9uf4rN2jJceS9aofIQRmzJiBNWvWYOvWrYiMjJSzHLIj8ZVX/2xLyUBJmV7maoiISC6yBpXp06cjMTERK1asgFarRXp6OtLT01FcXCxnWWQHOoR5o2kTNxSV6rH9zE25yyEiIpnIGlSWLFmC3NxcPPjggwgJCTE9Vq1aJWdZZAckSTLdUXkDF38jImq0ZJ2jYifzeMlOxbcPxue7UrH51A2UlhugduEdH4iIGhv+y092q2szHwRoNcgvKceu85lyl0NERDJgUCG7pVBIGBxTseDbhuMc/iEiaowYVMiuGW9SuOlkOsr1BpmrISKihsagQnatZ6QvfNxVyC4qw77ULLnLISKiBsagQnbNRanAwHYVwz/refUPEVGjw6BCds84/LPxRDoMBl4pRkTUmDCokN3r3dIPWo0LMvJ1OHQ5W+5yiIioATGokN3TuCgxoG0gAA7/EBE1Ngwq5BCGVA7/bEhO50KBRESNCIMKOYR+rQPgplLiWk4xjl/LlbscIiJqIAwq5BDc1Er0jw4AwOEfIqLGhEGFHAaHf4iIGh8GFXIYD0UHQu2iQGpmIVJu5MtdDhERNQAGFXIYnhoXPNDKHwCwnvf+ISJqFBhUyKHcPvxDRETOj0GFHMrAtkFwUUhIuZGPCzcL5C6HiIhsjEGFHIq3uwq9WvgB4NU/RESNAYMKOZx4Dv8QETUaDCrkcAbFBEEhAcev5eJKVpHc5RARkQ0xqJDD8ffUoHtzXwAVd1QmIiLnxaBCDik+NhgA56kQETk7BhVySMbLlA9eysaNvBKZqyEiIlthUCGHFOztis7NmgDg8A8RkTNjUCGHZRr+4Sq1REROi0GFHJbxMuW9qbdwq0AnczVERGQLDCrksMJ93RET6gWDAJJO3pC7HCIisgEGFXJovPqHiMi5MaiQQzNe/fP7+UzkFpfJXA0REVkbgwo5tJaBnmgV6IkyvcCWUxz+ISJyNgwq5PA4/ENE5LwYVMjhGYd/dpy5iUJduczVEBGRNTGokMNrG6JFhJ87dOUGbEvJkLscIiKyIgYVcniSJGEIh3+IiJwSgwo5BePib9tOZ6CkTC9zNUREZC0MKuQUOoZ5I9TbFUWleuw4c1PucoiIyEoYVMgpSJKEwZXDPxs4/ENE5DQYVMhpGId/kk7dQGm5QeZqiIjIGhhUyGl0jfCBv6cG+SXl2JOaJXc5RERkBQwq5DSUCgmDY4IAABtPcJVaIiJnwKBCTsU4/PNLcjr235SwNzULeoOQuSoiIqorF7kLILKmnOJSSBJQoNMj8ZwSiecOIMTbFXOHtzOtYEtERI6DZ1TIaWxITsPzKw5D3HECJT23BFMTD2FDcpo8hRERUZ0xqJBT0BsE5q87ieoGeYzb5q87yWEgIiIHw6BCTmFfahbSckvu+roAkJZbgn28GoiIyKFwjgo5hYz8u4eU281bdwIPtg5A2xAvtA3xQlSAB1RK5nUiInvFoEJOIVDrWqv9UtLzkZKeb3quVirQKsjTFFzahmjRLsQLTdzVtiqViIgswKBCTqFHpC9CvF2RnltS7TwVCYCfpxovDmyNlPR8nErLw+m0fOTrynHieh5OXM8z2z/E29UUXIwhprmfB5QKqUH6ISKiCgwq5BSUCglzh7fD1MRDkACzsGKMFgtGxppdoiyEwNXsYpxMy8Mp0yMfl7OKkJZbgrTcEmw9nWHa31WlQJtgL7S7LbxEB2uhdVU1SI9ERI0Rgwo5jSGxIVjyRBfMX3fSbGJt8F3WUZEkCeG+7gj3dcfgmGDT9vySMtNZl5NpFX+mpOejuEyPo1dycPRKjtlxwn3dEB1cEVyMISbcxx0Knn0hIqo3BhVyKkNiQzCwXTB2n8vApt/2YlDfnujVMtCiIRutqwrdmvuiW3Nf0za9QeDSrUKcqgwuxsf13BJcySrGlaxiJJ38Y9l+T40L2gRrzYaOooO1cFfzrxwRkSX4ryY5HaVCQs9IX9w6JdAz0tcq80qUCglRAZ6ICvDEsA5/nJnJKSo1Dy/peThzowAFunIcvJSNg5eyTftKEtDcz6MivFSegWkb6oVQb1dIkmU16g0Ce1OzcDBTgl9qlsVhjIjIUTCoENVDE3c1erXwQ68WfqZt5XoDLmQWVg4d5ZmCzM18HVIzC5GaWYhfjqeb9vd2UyE6WFs5dFQRYFoFecJVpaz2PTckp902vKXEl2d5mwAicl4MKkRW5qJUoHWQFq2DtHikU1PT9swCndmk3VNpeTiXUYDc4jLsTc3C3tsWo1MqJET5e1S5bPrgpWxM++pQlSubjLcJWPJEF4YVInIqDCpEDcTfU4O+rQLQt1WAaZuuXI9zGQU4bRw+Sq8IMVmFpTibUYCzGQX48eh10/4KCXe9TYCEitsEDGwXzGEgInIaDCpEMtK4KBET6o2YUG/TNiEEMvJ1t102XRFizmcU4F63KjLeJmDQou1o7u8Bf08N/LVq+Hlo4K/VwN9TjQBPDfw8NWjipuJVSUTkEBhUiOyMJEkI8nJFkJcr+rcJNG3/7uAVzPn2WI2ffz6zEOczC++5j4tCgq+HujLMaODvoTaFGX9PzW0PNXw91HBp4NsMcLIwERkxqBA5iKZN3Gu135xBreHnqUFmvg6ZBTpkFpRW/lnxcW5xGcoNFWdtMvJ1QNq9jydJgI+72hRi/Dz/CDQVZ2huCzyeamhcqp8EXFucLExEt2NQIXIQtblNQLC3K6Y+2PKeZx9Kyw24VahDZn4pMgt1lYHmjzBz67aPswpLYRBAVmEpsgpLceZGQY11al1dTGdjbj87Yww0Ado/tntozP8J2pCchqmJnCxMRH9gUCFyELW5TcDc4e1qHCJRuygQ4u2GEG+3Gt9TbxDIKiytEmBuFlQEnVuFlWdqKj8u0wvkl5Qjv6QcqTUMPwGAm0ppCjB+Hmr8fuEWJwsTkRkGFSIHYultAupLqZAQoNUgQKupcV8hBHKLy8yHm8zO1tw+BKVDSZkBxWV6XM0uxtXs4pqPj4rJwvH/3oEof09TXYGVfxof/p4aqBp4Tg0R2Q6DCpGDscZtAmxBkiQ0cVejibsaLQPvva8QAoWletwynqHJL8WW0zfw7YGrNb7PmRsFNQ5B+XpUXOFUbZjx1CDQS4MAT1d4ublYvCqwtTj7hGFn748aDoMKkQOyxW0CGpIkSfDUuMBT44IIPw8AFSv01iaovBjXCk3c1biZr6t4FOiQkV+Cm5Vnb4zDVVmFpUi5kX/PY6mVCrOzMeZBxnx7fScJ387ZJww7e3/UsBhUiMgu1Hay8IyHWt01mBkMAtlFpRXhJe+PIGMMNcZAczNfh7yScpTqDbiWU4xrOTUPPXm7qUxBprohp0CtKwK0Na9R4+wThp29P6PGcMbIXnq0i6Dyn//8B++88w7S09PRsWNHfPDBB+jRo4fcZRFRA7LGZGGFQoJf5SXU0cH3fr+SMn2VIHOz8pJt4/bMyo9L9QbkFpcht7gM5zLuPezkopDgX81ZmUBtxYThV3844bQThvUGgfnrTjptf0aN4YyRPfUoe1BZtWoVZs+ejaVLl6Jnz55YvHgxBg8ejJSUFAQG1jDQTUROpSEnC7uqlAj3dUe4773XpzFOEjYbasqrLuCUILuoYo2a9LwSpOeV3PO41b4XKiYMd30jCRqVAgpJgoSKoTKFApAgQSFVPJekiv/4FZJUsZ9xOwCFAmafK0l/PP9j3z8+VlTO06k4VuX7SQAq389sP7NjmW+7mV9i9n27W39zf0xGq0At1C4KqJUKqCr/VLtIUCuVUCklqF0UUCkV0FT+qXZRVNkmR9hpDGeM7K1H2YPKokWL8Mwzz2Dy5MkAgKVLl+Lnn3/G559/jpdeeknm6oioodnbZOHbJwm3CtLec1/jGjU386sPM6fT83DxVlGN75lTXAbUPBrlsBL3XLbKcZQKqSLUGIOMWei5LeCYnktQu1QEIY1x/7vu+8fnaCpDklKS8Mqa5LueMQKAV384gZaBWrOf15p+cmuazy3VeISaj1Hbz9cbhN2d9ZM1qJSWluLgwYN4+eWXTdsUCgXi4uKwe/fuKvvrdDrodDrT87y8PABAWVkZysrKrFqb8XjWPq69YH+Oz9l77BKmxS1/gS5hWhj05TDo5a6oZhIAf3cX+Lu7oG2QR5XX96Zm4YnPD9R4nIWPtENMqBeEAAQEDKLizI4QgEEICFT+KWC2j6HiyW37AMJgvr/p88yOUfkepvequg3ij/eofGrax3icy7cK8c2h6/dqDQDQK8oH3m5qlJYbUKY3oFRvQJleoLTcUO2225/fTm8Q0BsESsoMtfjuNIyb+TrELdoudxk2YzwrtvtcBnpG+tb5OJb8uyVrUMnMzIRer0dQUJDZ9qCgIJw+fbrK/m+99Rbmz59fZfumTZvg7l675cUtlZSUZJPj2gv25/icvUdn6s8ggCZqJXJKgep/zxZoogbcbxzDpQzb1iIBsN51TBWaqoFNtehvbOBN1OWXcSEAvQDKBVBuqHiYPReA3gCUC6nyz+r3Nb1mkO54fufrxs+RUG4A8suAnNKaC1dJArcv5XOPe4ne+eWxxi4WufN4BgHoRc09bvptL26dqns1RUU1n1k0kn3oxxIvv/wyZs+ebXqel5eH8PBwDBo0CF5eXlZ9r7KyMiQlJWHgwIFQqVRWPbY9YH+Oz9l7dNb+VM1v4PmVRwFUN2FYwoJHO2JwTFA1n+kYnLm/2p4RWza5e73ONsiptj0O6tuzXj0aR0RqQ9ag4u/vD6VSiRs3bphtv3HjBoKDq07Z12g00GiqrpCpUqls9g+ZLY9tD9if43P2Hp2tv4c7hcHFRdlgqws3NGfur1fLwFpdQu/Ilyo3VI+W/J2WNaio1Wp07doVW7ZswciRIwEABoMBW7ZswYwZM+QsjYjIZuxtwrC1OWt/1rrflj2zxx5lvyHG7Nmz8d///hdffPEFTp06halTp6KwsNB0FRARkTMyri7c1d8xVxeuibP2Z7yEPtjb1Wx7sLerU1yaDNhfj7LPURk3bhxu3ryJ1157Denp6ejUqRM2bNhQZYItERGRPXDWM0a3s6ceZQ8qADBjxgwO9RARkcNw9Ptt1Ya99Cj70A8RERHR3TCoEBERkd1iUCEiIiK7xaBCREREdotBhYiIiOwWgwoRERHZLQYVIiIislsMKkRERGS3GFSIiIjIbtnFyrR1JUTF7ZIsuV10bZWVlaGoqAh5eXlOdedWI/bn+Jy9R2fvD3D+Htmf47NVj8b/t43/j9+LQweV/Px8AEB4eLjMlRAREZGl8vPz4e3tfc99JFGbOGOnDAYDrl+/Dq1WC0my7j0I8vLyEB4ejitXrsDLy8uqx7YH7M/xOXuPzt4f4Pw9sj/HZ6sehRDIz89HaGgoFIp7z0Jx6DMqCoUCYWFhNn0PLy8vp/0BBNifM3D2Hp29P8D5e2R/js8WPdZ0JsWIk2mJiIjIbjGoEBERkd1iULkLjUaDuXPnQqPRyF2KTbA/x+fsPTp7f4Dz98j+HJ899OjQk2mJiIjIufGMChEREdktBhUiIiKyWwwqREREZLcYVIiIiMhuMajcYceOHRg+fDhCQ0MhSRLWrl0rd0lW9dZbb6F79+7QarUIDAzEyJEjkZKSIndZVrNkyRJ06NDBtDhRr169sH79ernLspl//vOfkCQJL7zwgtylWM28efMgSZLZIzo6Wu6yrOratWt44okn4OfnBzc3N7Rv3x4HDhyQuyyrad68eZXvoSRJmD59utylWYVer8err76KyMhIuLm5oUWLFnjjjTdqdd8aR5Gfn48XXngBERERcHNzQ+/evbF//35ZanHolWltobCwEB07dsSUKVPw6KOPyl2O1W3fvh3Tp09H9+7dUV5ejldeeQWDBg3CyZMn4eHhIXd59RYWFoZ//vOfaNWqFYQQ+OKLL/DII4/g8OHDiImJkbs8q9q/fz8+/vhjdOjQQe5SrC4mJgabN282PXdxcZ5/qrKzs9GnTx/0798f69evR0BAAM6ePQsfHx+5S7Oa/fv3Q6/Xm54nJydj4MCBeOyxx2SsynrefvttLFmyBF988QViYmJw4MABTJ48Gd7e3pg5c6bc5VnF008/jeTkZPzvf/9DaGgoEhMTERcXh5MnT6Jp06YNW4yguwIg1qxZI3cZNpWRkSEAiO3bt8tdis34+PiITz/9VO4yrCo/P1+0atVKJCUliX79+olZs2bJXZLVzJ07V3Ts2FHuMmzmb3/7m7j//vvlLqNBzZo1S7Ro0UIYDAa5S7GKYcOGiSlTpphte/TRR0VCQoJMFVlXUVGRUCqV4qeffjLb3qVLF/H3v/+9wevh0E8jl5ubCwDw9fWVuRLr0+v1WLlyJQoLC9GrVy+5y7Gq6dOnY9iwYYiLi5O7FJs4e/YsQkNDERUVhYSEBFy+fFnukqzmxx9/RLdu3fDYY48hMDAQnTt3xn//+1+5y7KZ0tJSJCYmYsqUKVa/eaxcevfujS1btuDMmTMAgKNHj2Lnzp2Ij4+XuTLrKC8vh16vh6urq9l2Nzc37Ny5s8HrcZ7zqWQxg8GAF154AX369EFsbKzc5VjN8ePH0atXL5SUlMDT0xNr1qxBu3bt5C7LalauXIlDhw7JNl5saz179sTy5cvRpk0bpKWlYf78+ejbty+Sk5Oh1WrlLq/eLly4gCVLlmD27Nl45ZVXsH//fsycORNqtRoTJ06UuzyrW7t2LXJycjBp0iS5S7Gal156CXl5eYiOjoZSqYRer8fChQuRkJAgd2lWodVq0atXL7zxxhto27YtgoKC8PXXX2P37t1o2bJlwxfU4OdwHAicfOjnueeeExEREeLKlStyl2JVOp1OnD17Vhw4cEC89NJLwt/fX5w4cULusqzi8uXLIjAwUBw9etS0zdmGfu6UnZ0tvLy8nGb4TqVSiV69eplte/7558V9990nU0W2NWjQIPHwww/LXYZVff311yIsLEx8/fXX4tixY+LLL78Uvr6+Yvny5XKXZjXnzp0TDzzwgAAglEql6N69u0hISBDR0dENXguDyj04c1CZPn26CAsLExcuXJC7FJsbMGCA+POf/yx3GVaxZs0a0z8cxgcAIUmSUCqVory8XO4SbaJbt27ipZdekrsMq2jWrJn405/+ZLbto48+EqGhoTJVZDsXL14UCoVCrF27Vu5SrCosLEx8+OGHZtveeOMN0aZNG5kqsp2CggJx/fp1IYQQY8eOFUOHDm3wGjhHpZERQmDGjBlYs2YNtm7disjISLlLsjmDwQCdTid3GVYxYMAAHD9+HEeOHDE9unXrhoSEBBw5cgRKpVLuEq2uoKAA58+fR0hIiNylWEWfPn2qLAlw5swZREREyFSR7SxbtgyBgYEYNmyY3KVYVVFRERQK8/8+lUolDAaDTBXZjoeHB0JCQpCdnY2NGzfikUceafAaOEflDgUFBTh37pzpeWpqKo4cOQJfX180a9ZMxsqsY/r06VixYgV++OEHaLVapKenAwC8vb3h5uYmc3X19/LLLyM+Ph7NmjVDfn4+VqxYgV9//RUbN26UuzSr0Gq1VeYTeXh4wM/Pz2nmGc2ZMwfDhw9HREQErl+/jrlz50KpVGL8+PFyl2YVL774Inr37o0333wTY8eOxb59+/DJJ5/gk08+kbs0qzIYDFi2bBkmTpzoVJeXA8Dw4cOxcOFCNGvWDDExMTh8+DAWLVqEKVOmyF2a1WzcuBFCCLRp0wbnzp3DX//6V0RHR2Py5MkNX0yDn8Oxc9u2bRMAqjwmTpwod2lWUV1vAMSyZcvkLs0qpkyZIiIiIoRarRYBAQFiwIABYtOmTXKXZVPONkdl3LhxIiQkRKjVatG0aVMxbtw4ce7cObnLsqp169aJ2NhYodFoRHR0tPjkk0/kLsnqNm7cKACIlJQUuUuxury8PDFr1izRrFkz4erqKqKiosTf//53odPp5C7NalatWiWioqKEWq0WwcHBYvr06SInJ0eWWiQhnGgpPSIiInIqnKNCREREdotBhYiIiOwWgwoRERHZLQYVIiIislsMKkRERGS3GFSIiIjIbjGoEBERkd1iUCEihydJEtauXSt3GURkAwwqRFQvkyZNgiRJVR5DhgyRuzQicgLOdQMGIpLFkCFDsGzZMrNtGo1GpmqIyJnwjAoR1ZtGo0FwcLDZw8fHB0DFsMySJUsQHx8PNzc3REVF4bvvvjP7/OPHj+Ohhx6Cm5sb/Pz88Oc//xkFBQVm+3z++eeIiYmBRqNBSEgIZsyYYfZ6ZmYmRo0aBXd3d7Rq1Qo//vij6bXs7GwkJCQgICAAbm5uaNWqVZVgRUT2iUGFiGzu1VdfxejRo3H06FEkJCTg8ccfx6lTpwAAhYWFGDx4MHx8fLB//358++232Lx5s1kQWbJkCaZPn44///nPOH78OH788Ue0bNnS7D3mz5+PsWPH4tixYxg6dCgSEhKQlZVlev+TJ09i/fr1OHXqFJYsWQJ/f/+G+wIQUd3JcitEInIaEydOFEqlUnh4eJg9Fi5cKISouGP3c889Z/Y5PXv2FFOnThVCCPHJJ58IHx8fUVBQYHr9559/FgqFQqSnpwshhAgNDRV///vf71oDAPGPf/zD9LygoEAAEOvXrxdCCDF8+HAxefJk6zRMRA2Kc1SIqN769++PJUuWmG3z9fU1fdyrVy+z13r16oUjR44AAE6dOoWOHTvCw8PD9HqfPn1gMBiQkpICSZJw/fp1DBgw4J41dOjQwfSxh4cHvLy8kJGRAQCYOnUqRo8ejUOHDmHQoEEYOXIkevfuXadeiahhMagQUb15eHhUGYqxFjc3t1rtp1KpzJ5LkgSDwQAAiI+Px6VLl/DLL78gKSkJAwYMwPTp0/F///d/Vq+XiKyLc1SIyOb27NlT5Xnbtm0BAG3btsXRo0dRWFhoen3Xrl1QKBRo06YNtFotmjdvji1bttSrhoCAAEycOBGJiYlYvHgxPvnkk3odj4gaBs+oEFG96XQ6pKenm21zcXExTVj99ttv0a1bN9x///346quvsG/fPnz22WcAgISEBMydOxcTJ07EvHnzcPPmTTz//PN48sknERQUBACYN28ennvuOQQGBiI+Ph75+fnYtWsXnn/++VrV99prr6Fr166IiYmBTqfDTz/9ZApKRGTfGFSIqN42bNiAkJAQs21t2rTB6dOnAVRckbNy5UpMmzYNISEh+Prrr9GuXTsAgLu7OzZu3IhZs2ahe/fucHd3x+jRo7Fo0SLTsSZOnIiSkhK89957mDNnDvz9/TFmzJha16dWq/Hyyy/j4sWLcHNzQ9++fbFy5UordE5EtiYJIYTcRRCR85IkCWvWrMHIkSPlLoWIHBDnqBAREZHdYlAhIiIiu8U5KkRkUxxdJqL64BkVIiIislsMKkRERGS3GFSIiIjIbjGoEBERkd1iUCEiIiK7xaBCREREdotBhYiIiOwWgwoRERHZLQYVIiIislv/D/KypAveE6obAAAAAElFTkSuQmCC",
|
| 520 |
+
"text/plain": [
|
| 521 |
+
"<Figure size 640x480 with 1 Axes>"
|
| 522 |
+
]
|
| 523 |
+
},
|
| 524 |
+
"metadata": {},
|
| 525 |
+
"output_type": "display_data"
|
| 526 |
+
}
|
| 527 |
+
],
|
| 528 |
+
"source": [
|
| 529 |
+
"import matplotlib.pyplot as plt\n",
|
| 530 |
+
"\n",
|
| 531 |
+
"# Given numbers\n",
|
| 532 |
+
"numbers = [\n",
|
| 533 |
+
" 9.069988300325349e-05,\n",
|
| 534 |
+
" 7.004399230936542e-05,\n",
|
| 535 |
+
" 9.137028973782435e-05,\n",
|
| 536 |
+
" 5.340397547115572e-05,\n",
|
| 537 |
+
" 5.0301870942348614e-05,\n",
|
| 538 |
+
" 9.043936188390944e-06,\n",
|
| 539 |
+
" 4.6886875679774676e-06,\n",
|
| 540 |
+
" 4.490133960644016e-06,\n",
|
| 541 |
+
" 6.136821866675746e-06,\n",
|
| 542 |
+
" 3.3243470625166083e-06,\n",
|
| 543 |
+
" 2.348009729757905e-06,\n",
|
| 544 |
+
" 2.1804094103572425e-06,\n",
|
| 545 |
+
" 1.958705070137512e-06,\n",
|
| 546 |
+
" 6.988730092416517e-07,\n",
|
| 547 |
+
" 5.00343162457284e-07,\n",
|
| 548 |
+
" 4.1343139400851214e-07,\n",
|
| 549 |
+
" 5.06081335061026e-07,\n",
|
| 550 |
+
" 7.039822662591178e-07,\n",
|
| 551 |
+
" 5.087575800644117e-07,\n",
|
| 552 |
+
" 2.0332389993882316e-08,\n",
|
| 553 |
+
" 1.718821529550496e-08,\n",
|
| 554 |
+
" 1.5028433608677005e-08,\n",
|
| 555 |
+
" 3.9828059072988253e-08,\n",
|
| 556 |
+
" 2.8266715190738978e-08,\n",
|
| 557 |
+
" 2.1497044144780375e-08,\n",
|
| 558 |
+
" 9.854548288501519e-09\n",
|
| 559 |
+
"]\n",
|
| 560 |
+
"\n",
|
| 561 |
+
"# Sorting the numbers in descending order\n",
|
| 562 |
+
"numbers.sort(reverse=True)\n",
|
| 563 |
+
"\n",
|
| 564 |
+
"# Selecting every 3rd number for plotting\n",
|
| 565 |
+
"selected_numbers = numbers[::3]\n",
|
| 566 |
+
"\n",
|
| 567 |
+
"# Creating x-axis values (epochs)\n",
|
| 568 |
+
"epochs = list(range(1, len(selected_numbers) + 1))\n",
|
| 569 |
+
"\n",
|
| 570 |
+
"# Plotting the curve\n",
|
| 571 |
+
"plt.plot(epochs, selected_numbers, marker='o', linestyle='-')\n",
|
| 572 |
+
"plt.xlabel('Epochs')\n",
|
| 573 |
+
"plt.ylabel('Evaluation Loss')\n",
|
| 574 |
+
"plt.title('Evaluation Loss vs Epochs')\n",
|
| 575 |
+
"plt.grid(True)\n",
|
| 576 |
+
"plt.show()\n"
|
| 577 |
+
]
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
"cell_type": "code",
|
| 581 |
+
"execution_count": 5,
|
| 582 |
+
"metadata": {},
|
| 583 |
+
"outputs": [
|
| 584 |
+
{
|
| 585 |
+
"name": "stdout",
|
| 586 |
+
"output_type": "stream",
|
| 587 |
+
"text": [
|
| 588 |
+
"Running on local URL: http://127.0.0.1:7862\n",
|
| 589 |
+
"\n",
|
| 590 |
+
"To create a public link, set `share=True` in `launch()`.\n"
|
| 591 |
+
]
|
| 592 |
+
},
|
| 593 |
+
{
|
| 594 |
+
"data": {
|
| 595 |
+
"text/html": [
|
| 596 |
+
"<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
| 597 |
+
],
|
| 598 |
+
"text/plain": [
|
| 599 |
+
"<IPython.core.display.HTML object>"
|
| 600 |
+
]
|
| 601 |
+
},
|
| 602 |
+
"metadata": {},
|
| 603 |
+
"output_type": "display_data"
|
| 604 |
+
},
|
| 605 |
+
{
|
| 606 |
+
"data": {
|
| 607 |
+
"text/plain": []
|
| 608 |
+
},
|
| 609 |
+
"execution_count": 5,
|
| 610 |
+
"metadata": {},
|
| 611 |
+
"output_type": "execute_result"
|
| 612 |
+
}
|
| 613 |
+
],
|
| 614 |
+
"source": [
|
| 615 |
+
"import gradio as gr\n",
|
| 616 |
+
"import time\n",
|
| 617 |
+
"def my_function(x, progress=gr.Progress()):\n",
|
| 618 |
+
" progress(0, desc=\"Starting...\")\n",
|
| 619 |
+
" time.sleep(1)\n",
|
| 620 |
+
" for i in progress.tqdm(range(100)):\n",
|
| 621 |
+
" time.sleep(0.1)\n",
|
| 622 |
+
" return x\n",
|
| 623 |
+
"with gr.Blocks() as demo:\n",
|
| 624 |
+
" a=gr.Textbox()\n",
|
| 625 |
+
" b=gr.TextArea(\"tt\")\n",
|
| 626 |
+
" btn=gr.Button(\"Test\")\n",
|
| 627 |
+
" btn.click(my_function,a,b)\n",
|
| 628 |
+
"demo.launch()"
|
| 629 |
+
]
|
| 630 |
+
},
|
| 631 |
+
{
|
| 632 |
+
"cell_type": "code",
|
| 633 |
+
"execution_count": null,
|
| 634 |
+
"metadata": {},
|
| 635 |
+
"outputs": [],
|
| 636 |
+
"source": [
|
| 637 |
+
"if self.quantization == '8':\n",
|
| 638 |
+
" bnb_config = BitsAndBytesConfig( \n",
|
| 639 |
+
" load_in_8bit= True,\n",
|
| 640 |
+
" )\n",
|
| 641 |
+
"elif self.quantization == '4':\n",
|
| 642 |
+
" bnb_config = BitsAndBytesConfig(\n",
|
| 643 |
+
" load_in_4bit= True,\n",
|
| 644 |
+
" bnb_4bit_use_double_quant=True,\n",
|
| 645 |
+
" bnb_4bit_quant_type=\"nf4\", \n",
|
| 646 |
+
" bnb_4bit_compute_dtype=torch.bfloat16\n",
|
| 647 |
+
" )\n",
|
| 648 |
+
"model = AutoModelForCausalLM.from_pretrained(\n",
|
| 649 |
+
" base_model,\n",
|
| 650 |
+
" quantization_config=bnb_config,\n",
|
| 651 |
+
" torch_dtype=torch.bfloat16,\n",
|
| 652 |
+
" device_map=\"auto\",\n",
|
| 653 |
+
" trust_remote_code=True,\n",
|
| 654 |
+
" )"
|
| 655 |
+
]
|
| 656 |
+
},
|
| 657 |
+
{
|
| 658 |
+
"cell_type": "code",
|
| 659 |
+
"execution_count": null,
|
| 660 |
+
"metadata": {},
|
| 661 |
+
"outputs": [],
|
| 662 |
+
"source": [
|
| 663 |
+
"config = LoraConfig(\n",
|
| 664 |
+
" r= lora_r if lora_r else 16,\n",
|
| 665 |
+
" lora_alpha= lora_alpha if lora_alpha else 32,\n",
|
| 666 |
+
" target_modules=[\"q_proj\", \"v_proj\",\"k_proj\",\"o_proj\",\"gate_proj\",\"up_proj\",\"down_proj\"], \n",
|
| 667 |
+
" lora_dropout= lora_dropout if lora_dropout else 0.05,\n",
|
| 668 |
+
" bias=\"none\",\n",
|
| 669 |
+
" task_type=\"CAUSAL_LM\")\n",
|
| 670 |
+
"\n",
|
| 671 |
+
"training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,\n",
|
| 672 |
+
" gradient_accumulation_steps=GRAD_ACC,\n",
|
| 673 |
+
" optim=OPTIMIZER,\n",
|
| 674 |
+
" learning_rate=LR,\n",
|
| 675 |
+
" fp16=True, \n",
|
| 676 |
+
" logging_steps=10,\n",
|
| 677 |
+
" num_train_epochs = epoch if epoch else 2,\n",
|
| 678 |
+
" output_dir=lora_output,\n",
|
| 679 |
+
" remove_unused_columns=True,\n",
|
| 680 |
+
" )"
|
| 681 |
+
]
|
| 682 |
+
},
|
| 683 |
+
{
|
| 684 |
+
"cell_type": "code",
|
| 685 |
+
"execution_count": null,
|
| 686 |
+
"metadata": {},
|
| 687 |
+
"outputs": [],
|
| 688 |
+
"source": []
|
| 689 |
+
}
|
| 690 |
+
],
|
| 691 |
+
"metadata": {
|
| 692 |
+
"kernelspec": {
|
| 693 |
+
"display_name": "lang",
|
| 694 |
+
"language": "python",
|
| 695 |
+
"name": "python3"
|
| 696 |
+
},
|
| 697 |
+
"language_info": {
|
| 698 |
+
"codemirror_mode": {
|
| 699 |
+
"name": "ipython",
|
| 700 |
+
"version": 3
|
| 701 |
+
},
|
| 702 |
+
"file_extension": ".py",
|
| 703 |
+
"mimetype": "text/x-python",
|
| 704 |
+
"name": "python",
|
| 705 |
+
"nbconvert_exporter": "python",
|
| 706 |
+
"pygments_lexer": "ipython3",
|
| 707 |
+
"version": "3.10.13"
|
| 708 |
+
}
|
| 709 |
+
},
|
| 710 |
+
"nbformat": 4,
|
| 711 |
+
"nbformat_minor": 2
|
| 712 |
+
}
|
utils.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import datetime
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import os
|
| 5 |
+
# It shows the demo data format in finetuning tab
|
| 6 |
+
def move_to(move,model_ans):
|
| 7 |
+
df_temp=pd.read_excel(os.path.join("model_ans",str(model_ans)))
|
| 8 |
+
id_temp=int((df_temp.loc[move])['id'])
|
| 9 |
+
ques_temp=(df_temp.loc[move])['question']
|
| 10 |
+
ans_temp=(df_temp.loc[move])['answer']
|
| 11 |
+
if int(move)>=len(df_temp)+1:
|
| 12 |
+
gr.Info(f"Number of questions: {len(df_temp)}")
|
| 13 |
+
move=0
|
| 14 |
+
return [
|
| 15 |
+
gr.Label(value=str(id_temp),label="ID"),
|
| 16 |
+
gr.Label(value=ques_temp,label="Question"),
|
| 17 |
+
gr.Label(value=ans_temp,label="Answer")
|
| 18 |
+
]
|
| 19 |
+
def display_table(path=r"data/demo_table_data.xlsx"):
|
| 20 |
+
df = pd.read_excel(path)
|
| 21 |
+
df_with_custom_index = df.head(2)
|
| 22 |
+
# df_with_custom_index.index = [f"Row {i+1}" for i in range(len(df_with_custom_index))]
|
| 23 |
+
html_table = df_with_custom_index.to_html(index=False)
|
| 24 |
+
return f"<div style='overflow-x:auto;'>{html_table}</div>"
|
| 25 |
+
def current_time():
|
| 26 |
+
# ff="model_ans_llama_finetuned486_rag_ensemble"
|
| 27 |
+
# df=pd.read_excel(r"model_ans/model_ans_mistral_finetuned486_rag_ensemble.xlsx")
|
| 28 |
+
current_datetime = datetime.datetime.now()
|
| 29 |
+
# file_name = current_datetime.strftime("%Y_%m_%d_%H_%M_%S")+ff
|
| 30 |
+
return current_datetime.strftime("%Y_%m_%d_%H_%M_%S")
|
| 31 |
+
# This function use in human evaluation
|
| 32 |
+
def random_ques_ans2():
|
| 33 |
+
import random
|
| 34 |
+
import pandas as pd
|
| 35 |
+
df=pd.read_excel(r"data/existing_dataset.xlsx")
|
| 36 |
+
id=random.randint(0,len(df))
|
| 37 |
+
ques_temp=(df.loc[id])['question']
|
| 38 |
+
ans_temp=""
|
| 39 |
+
return ques_temp,ans_temp
|
| 40 |
+
def score_report_bar():
|
| 41 |
+
path="score_report"
|
| 42 |
+
import os
|
| 43 |
+
import math
|
| 44 |
+
dat=[]
|
| 45 |
+
for x in os.listdir(path):
|
| 46 |
+
wh=[]
|
| 47 |
+
flag=0
|
| 48 |
+
for x2 in x:
|
| 49 |
+
if x2>='a' and x2<='z':
|
| 50 |
+
flag=1
|
| 51 |
+
wh.append(x2)
|
| 52 |
+
elif flag==1:
|
| 53 |
+
wh.append(" ")
|
| 54 |
+
wh=''.join(wh)
|
| 55 |
+
wh=wh.replace("model ans","")
|
| 56 |
+
wh=wh.replace("finetuned","")
|
| 57 |
+
wh=wh.replace(" "," ")
|
| 58 |
+
wh=wh.replace("xlsx","")
|
| 59 |
+
df_temp=pd.read_excel(os.path.join(path,x))
|
| 60 |
+
rating=sum(df_temp["rating"])/len(df_temp)
|
| 61 |
+
dat.append({
|
| 62 |
+
"Model Name":wh,
|
| 63 |
+
"Average Rating":rating
|
| 64 |
+
})
|
| 65 |
+
temp=pd.DataFrame(dat)
|
| 66 |
+
return temp
|
| 67 |
+
def parse_data(link,progress):
|
| 68 |
+
from bs4 import BeautifulSoup
|
| 69 |
+
import requests
|
| 70 |
+
import re
|
| 71 |
+
from docx import Document
|
| 72 |
+
from langchain_community.document_loaders import WebBaseLoader
|
| 73 |
+
s=set()
|
| 74 |
+
import time
|
| 75 |
+
start_time = time.time()
|
| 76 |
+
duration = 5
|
| 77 |
+
def get_links(url):
|
| 78 |
+
response = requests.get(url)
|
| 79 |
+
data = response.text
|
| 80 |
+
soup = BeautifulSoup(data, 'lxml')
|
| 81 |
+
|
| 82 |
+
links = []
|
| 83 |
+
for link in soup.find_all('a'):
|
| 84 |
+
link_url = link.get('href')
|
| 85 |
+
if link_url is not None and link_url.startswith('http'):
|
| 86 |
+
s.add(link_url)
|
| 87 |
+
links.append(link_url)
|
| 88 |
+
|
| 89 |
+
return links
|
| 90 |
+
# def write_to_file(links):
|
| 91 |
+
# with open('data.txt', 'a') as f:
|
| 92 |
+
# f.writelines(links)
|
| 93 |
+
def get_all_links(url):
|
| 94 |
+
for link in get_links(url):
|
| 95 |
+
if (time.time() - start_time) >= duration:
|
| 96 |
+
return
|
| 97 |
+
get_all_links(link)
|
| 98 |
+
|
| 99 |
+
def data_ret2(link):
|
| 100 |
+
loader = WebBaseLoader(f"{link}")
|
| 101 |
+
data = loader.load()
|
| 102 |
+
return data[0].page_content
|
| 103 |
+
# link = 'https://kuet.ac.bd'
|
| 104 |
+
s.add(link)
|
| 105 |
+
get_all_links(link)
|
| 106 |
+
li=list(s)
|
| 107 |
+
all_data=[]
|
| 108 |
+
for x in progress.tqdm(li):
|
| 109 |
+
try:
|
| 110 |
+
print("Link: ",x)
|
| 111 |
+
all_data.append(data_ret2(x))
|
| 112 |
+
except:
|
| 113 |
+
print("pass")
|
| 114 |
+
continue
|
| 115 |
+
all_data2 = re.sub(r'\n+', '\n\n', "\n".join(all_data))
|
| 116 |
+
all_data2=re.sub(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]+', '', all_data2)
|
| 117 |
+
document = Document()
|
| 118 |
+
document.add_paragraph(all_data2)
|
| 119 |
+
document.save(f'rag_data/{link}.docx')
|
| 120 |
+
print("Finished!!")
|
| 121 |
+
return
|
| 122 |
+
def all_contri_ans(id, ques):
|
| 123 |
+
folder_path = 'save_ques_ans'
|
| 124 |
+
data_frames = []
|
| 125 |
+
for filename in os.listdir(folder_path):
|
| 126 |
+
if filename.endswith(".xlsx") or filename.endswith(".xls"):
|
| 127 |
+
file_path = os.path.join(folder_path, filename)
|
| 128 |
+
df = pd.read_excel(file_path)
|
| 129 |
+
data_frames.append(df)
|
| 130 |
+
|
| 131 |
+
df_hum = pd.concat(data_frames, ignore_index=True)
|
| 132 |
+
temp=[]
|
| 133 |
+
for x,y in zip(df_hum['question'],df_hum['answer']):
|
| 134 |
+
if x==ques:
|
| 135 |
+
temp.append(y)
|
| 136 |
+
if len(temp)==0:
|
| 137 |
+
temp=["This question's answer is not available."]
|
| 138 |
+
return temp
|
| 139 |
+
import json
|
| 140 |
+
import os
|
| 141 |
+
|
| 142 |
+
def save_params_to_file(model_name,embedding_name, splitter_type_dropdown, chunk_size_slider,
|
| 143 |
+
chunk_overlap_slider, separator_textbox, max_tokens_slider, filename="params.txt"):
|
| 144 |
+
params = {
|
| 145 |
+
"model_name":model_name,
|
| 146 |
+
"embedding_name": embedding_name,
|
| 147 |
+
"splitter_type_dropdown": splitter_type_dropdown,
|
| 148 |
+
"chunk_size_slider": chunk_size_slider,
|
| 149 |
+
"chunk_overlap_slider": chunk_overlap_slider,
|
| 150 |
+
"separator_textbox": separator_textbox,
|
| 151 |
+
"max_tokens_slider": max_tokens_slider
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
with open(filename, 'w') as f:
|
| 155 |
+
json.dump(params, f)
|
| 156 |
+
with open("deploy//params.txt", 'w') as f:
|
| 157 |
+
json.dump(params, f)
|
| 158 |
+
|
| 159 |
+
def load_params_from_file(filename="params.txt"):
|
| 160 |
+
if os.path.exists(filename):
|
| 161 |
+
with open(filename, 'r') as f:
|
| 162 |
+
params = json.load(f)
|
| 163 |
+
return params
|
| 164 |
+
else:
|
| 165 |
+
return None
|