Nelly43 commited on
Commit
0c6d13f
·
1 Parent(s): 8c0848a

Update app

Browse files
app.py ADDED
@@ -0,0 +1,1768 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import os
4
+ from pathlib import Path
5
+ import random
6
+ from utils import display_table,current_time,random_ques_ans2,move_to,score_report_bar,all_contri_ans
7
+ from inference import model_chain
8
+ import warnings
9
+ from huggingface_hub import snapshot_download
10
+
11
+ snapshot_download(repo_id="CGIAR/weai-ref",
12
+ repo_type="dataset",
13
+ token=os.getenv('HF_TOKEN'),
14
+ local_dir='./rag_data'
15
+ )
16
+ warnings.filterwarnings('ignore')
17
+ os.environ["WANDB_DISABLED"] = "true"
18
+ global cnt
19
+ cnt=1
20
+ data=[]
21
+ save_ques_ans=[]
22
+ save_ques_ans_test=[]
23
+ cur_time=current_time()
24
+
25
+ from huggingface_hub import HfApi
26
+ # Initialize the Hugging Face API client
27
+ api = HfApi()
28
+
29
+ # Specify the organization where Llama models are hosted
30
+ organization = "meta-llama"
31
+
32
+ # List all models belonging to the specified organization
33
+ # This will return a list of ModelInfo objects
34
+ llama_models = [model.modelId for model in api.list_models(author=organization)
35
+ if 'chat' in model.modelId]
36
+
37
+ def random_ques_ans(model_ans):
38
+ df_temp=pd.read_excel(os.path.join("model_ans",str(model_ans)))
39
+ global cnt
40
+ id=int((df_temp.loc[cnt])['id'])
41
+ ques_temp=(df_temp.loc[cnt])['question']
42
+ ans_temp=(df_temp.loc[cnt])['answer']
43
+ cnt+=1
44
+ if cnt>=len(df_temp):
45
+ cnt=0
46
+ return ques_temp,ans_temp,id,0
47
+ return ques_temp,ans_temp,id,1
48
+ def save_all(model_ans):
49
+ temp=pd.DataFrame(data)
50
+ temp.to_excel(f"score_report\\{model_ans+cur_time}.xlsx",index=False)
51
+ gr.Info("Sucessfully save all the answer!!!")
52
+
53
+ def score_save(ques,ans,score,model_ans,token_key):
54
+ data.append({
55
+ "question":ques,
56
+ 'answer':ans,
57
+ 'rating':score
58
+ })
59
+ # if len(data)%5==0:
60
+ temp=pd.DataFrame(data)
61
+ temp.to_excel(f"score_report\\{model_ans+cur_time}.xlsx",index=False)
62
+ gr.Info("Sucessfully saved in local folder!!!")
63
+ ques_temp,ans_temp,id,flag=random_ques_ans(model_ans)
64
+ gr.Info("Your opinion is submitted successfully!!!")
65
+ return gr.Label(value=id,label="ID"),gr.Label(value=ques_temp, label="Question"), gr.Label(value=ans_temp, label="Answer")
66
+
67
+ def new_ques(model_ans):
68
+ ques_temp,ans_temp,id2,flag=random_ques_ans(model_ans)
69
+ return {
70
+ id:gr.Label(value=id2,label="ID"),
71
+ ques:gr.Label(value=ques_temp,label="Question"),
72
+ ans:gr.Label(value=ans_temp,label="Answer")
73
+ }
74
+
75
+ def save_the_ques(ques,ans,file_type = 'xlsx'):
76
+ """
77
+ Saves a question and answer pair to a specified file (xlsx or csv).
78
+
79
+ Args:
80
+ ques (str): The question.
81
+ ans (str): The answer.
82
+ file_type (str, optional): The file type to save to ("xlsx" or "csv").
83
+ Defaults to "xlsx".
84
+
85
+ Returns:
86
+ str: A success label.
87
+ """
88
+
89
+ new_data = {"question": [ques], "answer": [ans]}
90
+ df_new = pd.DataFrame(new_data)
91
+
92
+ filepath = f"data/finetune_data.{file_type}"
93
+
94
+ if Path(filepath).is_file():
95
+ df_existing = pd.read_excel(filepath) if file_type == "xlsx" else pd.read_csv(filepath)
96
+ df_combined = pd.concat([df_existing, df_new], ignore_index=True)
97
+ else:
98
+ df_combined = df_new
99
+
100
+ if file_type == "xlsx":
101
+ df_combined.to_excel(filepath, index=False)
102
+ elif file_type == "csv":
103
+ df_combined.to_csv(filepath, index=False)
104
+
105
+ return gr.Label(value="Successfully saved in local folder.", visible=True)
106
+
107
+ def save_the_ques_test(ques, ans, file_type = 'xlsx'):
108
+ """
109
+ Saves a question and answer pair to a specified file (xlsx or csv).
110
+
111
+ Args:
112
+ ques (str): The question.
113
+ ans (str): The answer.
114
+ file_type (str, optional): The file type to save to ("xlsx" or "csv").
115
+ Defaults to "xlsx".
116
+
117
+ Returns:
118
+ str: A success label.
119
+ """
120
+
121
+ new_data = {"question": [ques], "answer": [ans]}
122
+ df_new = pd.DataFrame(new_data)
123
+
124
+ filepath = f"data/testing_data.{file_type}"
125
+
126
+ if Path(filepath).is_file():
127
+ df_existing = pd.read_excel(filepath) if file_type == "xlsx" else pd.read_csv(filepath)
128
+ df_combined = pd.concat([df_existing, df_new], ignore_index=True)
129
+ else:
130
+ df_combined = df_new
131
+
132
+ if file_type == "xlsx":
133
+ df_combined.to_excel(filepath, index=False)
134
+ elif file_type == "csv":
135
+ df_combined.to_csv(filepath, index=False)
136
+
137
+ return gr.Label(value="Successfully saved in local folder.", visible=True)
138
+
139
+ import pandas as pd
140
+ from pathlib import Path
141
+
142
+ def save_emb_data(loss_function, first_input, second_input, third_input, file_type="xlsx"):
143
+ """
144
+ Saves embedding data based on the specified loss function to either an Excel
145
+ file (xlsx) or a CSV file (csv).
146
+
147
+ Args:
148
+ loss_function (str): The name of the loss function.
149
+ first_input: The first input data.
150
+ second_input: The second input data.
151
+ third_input: The third input data.
152
+ file_type (str, optional): The file type to save to ("xlsx" or "csv").
153
+ Defaults to "xlsx".
154
+
155
+ Returns:
156
+ str: A success message indicating whether data was appended or a new file
157
+ was created.
158
+ """
159
+
160
+ if loss_function == "MultipleNegativesRankingLoss":
161
+ data = pd.DataFrame({
162
+ "anchor": [first_input],
163
+ "positive": [second_input],
164
+ "negative": [third_input]
165
+ })
166
+ elif loss_function == "OnlineContrastiveLoss":
167
+ data = pd.DataFrame({
168
+ "sentence1": [first_input],
169
+ "sentence2": [second_input],
170
+ "label": [third_input]
171
+ })
172
+ elif loss_function == "CoSENTLoss":
173
+ data = pd.DataFrame({
174
+ "sentence1": [first_input],
175
+ "sentence2": [second_input],
176
+ "score": [third_input]
177
+ })
178
+ elif loss_function == "GISTEmbedLoss":
179
+ data = pd.DataFrame({
180
+ "anchor": [first_input],
181
+ "positive": [second_input],
182
+ "negative": [third_input]
183
+ })
184
+ elif loss_function == "TripletLoss":
185
+ data = pd.DataFrame({
186
+ "anchor": [first_input],
187
+ "positive": [second_input],
188
+ "negative": [third_input]
189
+ })
190
+
191
+ filepath = f"data/emb_data.{file_type}"
192
+
193
+ try:
194
+ if file_type == "xlsx":
195
+ existing_data = pd.read_excel(filepath)
196
+ elif file_type == "csv":
197
+ existing_data = pd.read_csv(filepath)
198
+
199
+ if list(data.columns) == list(existing_data.columns):
200
+ combined_data = pd.concat([existing_data, data], ignore_index=True)
201
+ if file_type == "xlsx":
202
+ combined_data.to_excel(filepath, index=False)
203
+ elif file_type == "csv":
204
+ combined_data.to_csv(filepath, index=False)
205
+ return "Data appended to existing file!"
206
+ else:
207
+ if file_type == "xlsx":
208
+ data.to_excel(filepath, index=False)
209
+ elif file_type == "csv":
210
+ data.to_csv(filepath, index=False)
211
+ return "Data saved to a new file (overwritten)!"
212
+
213
+ except FileNotFoundError:
214
+ if file_type == "xlsx":
215
+ data.to_excel(filepath, index=False)
216
+ elif file_type == "csv":
217
+ data.to_csv(filepath, index=False)
218
+ return "Data saved to a new file!"
219
+
220
+ def parse_data_func(link_temp,progress=gr.Progress()):
221
+ progress(0, desc="Starting...")
222
+ parse_data(link_temp,progress)
223
+ gr.Info("Finished parsing!! Save as a docx file.")
224
+
225
+ def next_ques(ques,ans):
226
+ ques_temp,ans_temp=random_ques_ans2()
227
+ return gr.Label(value=ques_temp)
228
+
229
+ with gr.Blocks(title="LLM QA Chatbot Builder",theme=gr.themes.Soft()) as demo:
230
+ gr.Markdown("""
231
+ # LLM QA Chatbot Builder
232
+ """)
233
+ with gr.Tab("Data Collection"):
234
+ gr.Markdown(""" # Instructions:
235
+ In this page you can prepare data for LLM fine-tuning, testing and embedding model finetuning your model. The data can be provided through Excel file or CSV file or directly via web interface. Additionally, data can be parsed from the target website (Data parsing for RAG) to further enhance the model performance.
236
+
237
+ ## 1. If you want to provide data in Excel file or CSV file for model fine-tuning and testing.
238
+ - Create an Excel or CSV file in the data folder and name it `finetune_data.xlsx` or `finetune_data.csv` for finetuning the model.
239
+ - Create an Excel or CSV file in the data folder and name it `testing_data.xlsx` or `testing_data.csv` for generating answers using the fine-tuned model.
240
+ - `finetune_data.xlsx` or `finetune_data.csv` has two columns: `question` and `answer`. `testing_data.xlsx` or `testing_data.csv` has three columns: `question`, `ground_truth` ,`context`.
241
+ """)
242
+ gr.Markdown("""
243
+ ## `finetune_data.xlsx` | `finetune_data.csv`
244
+ """)
245
+ gr.HTML(value=display_table(), label="finetune_data.xlsx or finetune_data.csv")
246
+ gr.Markdown("""
247
+ ## `testing_data.xlsx` | `testing_data.csv`
248
+ """)
249
+ gr.HTML(value=display_table("data/demo_test_data.xlsx"), label="testing_data.xlsx or testing_data.csv")
250
+ gr.Markdown("""
251
+ ## 2. You can use the below interface to create the dataset for training and testing models.
252
+ """)
253
+
254
+ #Training data generation
255
+ with gr.Tab("Training Data Generation"):
256
+ with gr.Tab("Existing Questions"):
257
+ gr.Markdown("""
258
+ Existing questions are provided by the administrator and placed in the data folder named `existing_dataset.xlsx`. This file has only one column: `question`.
259
+ After clicking the `Save the Answer` button. Those questions and answers are saved in the `data` folder as a `finetune_data.xlsx` file.
260
+ """)
261
+ ques_temp,ans_temp=random_ques_ans2()
262
+ with gr.Row():
263
+ ques=gr.Label(value=ques_temp,label="Question")
264
+ with gr.Row():
265
+ ans=gr.TextArea(label="Answer")
266
+ with gr.Row():
267
+ with gr.Row():
268
+ type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
269
+ save_training = gr.Button(value="Save")
270
+ question = gr.Button("Generate New Question")
271
+ with gr.Row():
272
+ lab=gr.Label(visible=False)
273
+ question.click(next_ques,None,ques)
274
+ save_training.click(save_the_ques,[ques,ans,type_options],lab)
275
+
276
+ with gr.Tab("Custom Questions"):
277
+ gr.Markdown("""
278
+ After clicking the `save the answer` button. Those questions and answers are saved in the `data` folder as a `finetune_data.xlsx` file.
279
+ """)
280
+ with gr.Row():
281
+ ques=gr.Textbox(label="Question")
282
+ with gr.Row():
283
+ ans=gr.TextArea(label="Answer")
284
+ with gr.Row():
285
+ with gr.Row():
286
+ type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
287
+ save_training = gr.Button(value="Save")
288
+ with gr.Row():
289
+ lab=gr.Label(visible=False,value="You answer is submitted!!! Thank you for your contribution.",label="Submitted")
290
+ save_training.click(save_the_ques,[ques,ans,type_options],lab)
291
+
292
+ ### Testing data generation
293
+ with gr.Tab("Testing Data Generation"):
294
+ gr.Markdown("""
295
+ You can create test data for generating answers using the fine-tune model, which will be used for testing the model's performance.
296
+ After clicking the `Save the Answer` button. Those questions and answers are saved in the `data` folder as a `testing_data.xlsx` file.
297
+ """)
298
+ with gr.Row():
299
+ ques=gr.Textbox(label="Question")
300
+ with gr.Row():
301
+ ans=gr.TextArea(label="Ground Truth")
302
+ with gr.Row():
303
+ ans=gr.TextArea(label="Contexts")
304
+ with gr.Row():
305
+ with gr.Row():
306
+ type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
307
+ save_test = gr.Button(value="Save")
308
+ with gr.Row():
309
+ lab=gr.Label(visible=False,value="You answer is submitted!!! Thank you for your contribution.",label="Submitted")
310
+ save_test.click(save_the_ques_test,[ques,ans,type_options],None)
311
+
312
+ ## Embedding data generation
313
+ def update_fields(loss_function):
314
+ if loss_function == "MultipleNegativesRankingLoss":
315
+ first_input = gr.Textbox(label="Anchor", visible=True, placeholder="The sentence to be embedded.")
316
+ second_input = gr.Textbox(label="Positive", visible=True, placeholder="A sentence semantically similar to the anchor.")
317
+ third_input = gr.Textbox(label="Negative", visible=True, placeholder="A sentence semantically dissimilar to the anchor.")
318
+ markdown = gr.Markdown(
319
+ """
320
+ **MultipleNegativesRankingLoss:**
321
+ Expects data with columns: `anchor`, `positive`, `negative`.
322
+ - `anchor`: The sentence to be embedded.
323
+ - `positive`: A sentence semantically similar to the anchor.
324
+ - `negative`: A sentence semantically dissimilar to the anchor.""",
325
+ visible=True
326
+ )
327
+ elif loss_function == "OnlineContrastiveLoss":
328
+ first_input = gr.Textbox(label="Sentence 1", visible=True, placeholder="The first sentence.")
329
+ second_input = gr.Textbox(label="Sentence 2", visible=True, placeholder="The second sentence.")
330
+ third_input = gr.Textbox(label="Label", visible=True, placeholder="1 if the sentences are similar, 0 if dissimilar.")
331
+ markdown = gr.Markdown(
332
+ """
333
+ **OnlineContrastiveLoss:**
334
+ Expects data with columns: `sentence1`, `sentence2`, `label`.
335
+ - `sentence1`, `sentence2`: Pairs of sentences.
336
+ - `label`: 1 if the sentences are similar, 0 if dissimilar.""",
337
+ visible=True
338
+ )
339
+ elif loss_function == "CoSENTLoss":
340
+ first_input = gr.Textbox(label="Sentence 1", visible=True, placeholder="The first sentence.")
341
+ second_input = gr.Textbox(label="Sentence 2", visible=True, placeholder="The second sentence.")
342
+ third_input = gr.Textbox(label="Score", visible=True, placeholder="A float value (e.g., 0-1) representing their similarity.")
343
+ markdown = gr.Markdown(
344
+ """
345
+ **CoSENTLoss:**
346
+ Expects data with columns: `sentence1`, `sentence2`, `score`.
347
+ - `sentence1`, `sentence2`: Pairs of sentences.
348
+ - `score`: A float value (e.g., 0-1) representing their similarity.""",
349
+ visible=True
350
+ )
351
+ elif loss_function == "GISTEmbedLoss":
352
+ first_input = gr.Textbox(label="Anchor", visible=True, placeholder="The sentence to be embedded.")
353
+ second_input = gr.Textbox(label="Positive", visible=True, placeholder="A sentence semantically similar to the anchor.")
354
+ third_input = gr.Textbox(label="Negative", visible=True, placeholder="A sentence semantically dissimilar to the anchor. Can be empty.")
355
+ markdown = gr.Markdown(
356
+ """
357
+ **GISTEmbedLoss:**
358
+ Expects data with either:
359
+ - Columns: `anchor`, `positive`, `negative` (like TripletLoss).
360
+ - Columns: `anchor`, `positive` (for pairs of similar sentences).""",
361
+ visible=True
362
+ )
363
+ elif loss_function == "TripletLoss":
364
+ first_input = gr.Textbox(label="Anchor", visible=True, placeholder="The sentence to be embedded.")
365
+ second_input = gr.Textbox(label="Positive", visible=True, placeholder="A sentence semantically similar to the anchor.")
366
+ third_input = gr.Textbox(label="Negative", visible=True, placeholder="A sentence semantically dissimilar to the anchor.")
367
+ markdown = gr.Markdown(
368
+ """
369
+ **TripletLoss:**
370
+ Expects data with columns: `anchor`, `positive`, `negative`.
371
+ - `anchor`: The sentence to be embedded.
372
+ - `positive`: A sentence semantically similar to the anchor.
373
+ - `negative`: A sentence semantically dissimilar to the anchor.""",
374
+ visible=True
375
+ )
376
+ else:
377
+ first_input = gr.Textbox(visible=False)
378
+ second_input = gr.Textbox(visible=False)
379
+ third_input = gr.Textbox(visible=False)
380
+ markdown = gr.Markdown(visible=False)
381
+
382
+ return first_input, second_input, third_input, markdown
383
+
384
+ with gr.Tab("Embedding Data Generation"):
385
+ gr.Markdown("**Choose a loss function to format your embedding data.**")
386
+ with gr.Row():
387
+ loss_function = gr.Dropdown(
388
+ choices=[
389
+ "MultipleNegativesRankingLoss",
390
+ "OnlineContrastiveLoss",
391
+ "CoSENTLoss",
392
+ "GISTEmbedLoss",
393
+ "TripletLoss",
394
+ ],
395
+ label="Select the loss function",
396
+ )
397
+ with gr.Row():
398
+ gr.Markdown("""Format `data/emb_data.xlsx` or `data/emb_data.csv` to the expected data format, according to the selected loss function.
399
+ If the file exists and has matching columns, new data will be appended.
400
+ Otherwise, the file will be overwritten.""")
401
+ with gr.Row():
402
+ loss_info_markdown = gr.Markdown(visible=False)
403
+ with gr.Row():
404
+ first_input = gr.Textbox(label="Anchor", value="",visible=False)
405
+ second_input = gr.Textbox(label="Positive", value="",visible=False)
406
+ third_input = gr.Textbox(label="Negative", value="",visible=False)
407
+ loss_function.change(update_fields, loss_function, [first_input, second_input, third_input,loss_info_markdown])
408
+ with gr.Row():
409
+ with gr.Row():
410
+ type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
411
+ save_emb = gr.Button(value="Save")
412
+ save_emb.click(save_emb_data,[loss_function,first_input,second_input,third_input,type_options])
413
+
414
+ with gr.Row():
415
+ gr.Markdown("""
416
+ ## 3. Data parsing for RAG
417
+ """)
418
+ with gr.Row():
419
+ link_temp=gr.Textbox(label="Enter Link to Parse Data for RAG",info="To provide the link for parsing the data from the website, this link can help create RAG data for the model.")
420
+ parse_data_btn=gr.Button("Parse Data")
421
+ from utils import parse_data
422
+ parse_data_btn.click(parse_data_func,link_temp,link_temp)
423
+
424
+ #***************************************************
425
+ with gr.Tab("Fine-tuning"):
426
+ with gr.Tab("Fine-tune LLM"):
427
+ with gr.Row():
428
+ def login_hug(token):
429
+ from huggingface_hub import login
430
+ login(token=token)
431
+ login_hug(os.getenv('HF_TOKEN'))
432
+ gr.Markdown("""
433
+ # Instructions:
434
+ - Required VRAM for training: 24GB, for inference: 16GB.(Mistral, Zepyhr and Lllama)\n
435
+ - Required VRAM for training: 5GB, for inference: 4GB.(Phi,Flan-T5)
436
+ - For fine-tuning a custom model select `custom model` option in `Select the model for fine-tuning` dropdown section. The custom model can be configured by editing the code section.\n
437
+ - After fine-tuning the model, it will be saved in the `models` folder.
438
+ """)
439
+
440
+ def edit_model_parameter(model_name_temp,edit_code,code_temp,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout, progress=gr.Progress()):
441
+ progress(0, desc="Fine-tune started!! please wait ...")
442
+ # write code to files if code was edited
443
+ if edit_code and len(code_temp)!=0:
444
+ if model_name_temp=="Mistral":
445
+ open(r"fine_tune_file/mistral_finetune.py","w").write(code_temp)
446
+ elif model_name_temp=="Zephyr":
447
+ open(r"fine_tune_file/zepyhr_finetune.py","w").write(code_temp)
448
+ elif model_name_temp=="Llama":
449
+ open(r"fine_tune_file/llama_finetune.py","w").write(code_temp)
450
+ elif model_name_temp=="Phi":
451
+ open(r"fine_tune_file/phi_finetune.py","w").write(code_temp)
452
+ elif model_name_temp=="Custom model":
453
+ open(r"fine_tune_file/finetune_file.py","w").write(code_temp)
454
+ # importing just before finetuning, to ensure the latest code is used
455
+ # from fine_tune_file.mistral_finetune import mistral_trainer
456
+ # from fine_tune_file.zepyhr_finetune import zephyr_trainer
457
+ # from fine_tune_file.llama_finetune import llama_trainer
458
+ # from fine_tune_file.phi_finetune import phi_trainer
459
+ from fine_tune_file.finetune_file import custom_model_trainer
460
+ # from fine_tune_file.flant5_finetune import flant5_trainer
461
+ from fine_tune_file.modular_finetune import get_trainer
462
+ # create instance of the finetuning classes and then call the finetune function
463
+
464
+ if model_name_temp=="Custom model":
465
+ gr.Info("Fine-tune started!!!")
466
+ trainer=custom_model_trainer()
467
+ trainer.custom_model_finetune()
468
+ gr.Info("Fine-tune Ended!!!")
469
+ else:
470
+ trainer=get_trainer(model_name_temp)
471
+ gr.Info("Fine-tune started!!!")
472
+ if model_name_temp=="Mistral":
473
+ trainer.mistral_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
474
+ elif model_name_temp=="Zephyr":
475
+ trainer.zepyhr_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
476
+ elif model_name_temp=="Llama":
477
+ trainer.llama_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
478
+ elif model_name_temp=="Phi":
479
+ trainer.phi_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
480
+ elif model_name_temp=="Flant5":
481
+ trainer.flant5_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
482
+ gr.Info("Fine-tune Ended!!!")
483
+
484
+ def code_show(model_name):
485
+ if model_name=="Mistral":
486
+ f=open(r"fine_tune_file/mistral_finetune.py").read()
487
+ return gr.Code(visible=True,value=f,interactive=True,language="python")
488
+ elif model_name=="Zephyr":
489
+ f=open(r"fine_tune_file/zepyhr_finetune.py").read()
490
+ return gr.Code(visible=True,value=f,interactive=True,language="python")
491
+ elif model_name=="Llama":
492
+ f=open(r"fine_tune_file/llama_finetune.py").read()
493
+ return gr.Code(visible=True,value=f,interactive=True,language="python")
494
+ elif model_name=="Phi":
495
+ f=open(r"fine_tune_file/phi_finetune.py").read()
496
+ return gr.Code(visible=True,value=f,interactive=True,language="python")
497
+ elif model_name=="Flant5":
498
+ f=open(r"fine_tune_file/flant5_finetune.py").read()
499
+ return gr.Code(visible=True,value=f,interactive=True,language="python")
500
+
501
+ def custom_model(model_name): # It shows custom model code in the UI.
502
+ if model_name=="Custom model":
503
+ f=open(r"fine_tune_file/finetune_file.py").read()
504
+ return [gr.Code(visible=True,value=f,interactive=True,language="python"),gr.Button(visible=False)]
505
+ else:
506
+ return [gr.Code(visible=False),gr.Button("Advance Code Editing",visible=True)]
507
+ def change_code_fun(code_,model_name):
508
+ if model_name=="Mistral":
509
+ open(r"fine_tune_file/mistral_finetune.py","w").write(code_)
510
+ gr.Info("Successfully saved code!!!")
511
+ elif model_name=="Zephyr":
512
+ open(r"fine_tune_file/zepyhr_finetune.py","w").write(code_)
513
+ gr.Info("Successfully saved code!!!")
514
+ elif model_name=="Llama":
515
+ open(r"fine_tune_file/llama_finetune.py","w").write(code_)
516
+ gr.Info("Successfully saved code!!!")
517
+ elif model_name=="Phi":
518
+ open(r"fine_tune_file/phi_finetune.py","w").write(code_)
519
+ gr.Info("Successfully saved code!!!")
520
+ elif model_name=="Flant5":
521
+ open(r"fine_tune_file/flant5_finetune.py","w").write(code_)
522
+ gr.Info("Successfully saved code!!!")
523
+
524
+ def finetune_emb(model_name, loss_name, epochs = 1, batch_size = 8):
525
+ gr.Info("Embedding model fine-tune is started!!!")
526
+ from embedding_tuner import EmbeddingFinetuner
527
+ finetuner = EmbeddingFinetuner(
528
+ model_name=model_name,
529
+ loss_function=loss_name,
530
+ epochs=epochs,
531
+ batch_size=batch_size,
532
+ )
533
+ success = finetuner.train()
534
+ if success:
535
+ gr.Info("Embedding model fine-tune finished!!!")
536
+
537
+ with gr.Row():
538
+ code_temp=gr.Code(visible=False)
539
+ with gr.Row():
540
+ model_name=gr.Dropdown(choices=["Mistral","Zephyr","Llama","Phi","Flant5","Custom model"],label="Select the LLM for fine-tuning")
541
+ with gr.Accordion("Parameter Setup"):
542
+ with gr.Row():
543
+ lr=gr.Number(label="learning_rate",value=5e-6,interactive=True,info="The step size at which the model parameters are updated during training. It controls the magnitude of the updates to the model's weights.")
544
+ epoch=gr.Number(label="epochs",value=2,interactive=True,info="One complete pass through the entire training dataset during the training process. It's a measure of how many times the algorithm has seen the entire dataset.")
545
+ batch_size=gr.Number(label="batch_size",value=4,interactive=True,info="The number of training examples used in one iteration of training. It affects the speed and stability of the training process.")
546
+ gradient_accumulation = gr.Number(info="Gradient accumulation involves updating model weights after accumulating gradients over multiple batches, instead of after each individual batch.",label="gradient_accumulation",value=4,interactive=True)
547
+ with gr.Row():
548
+ quantization = gr.Dropdown(info="Quantization is a technique used to reduce the precision of numerical values, typically from 32-bit floating-point numbers to lower bit representations.",label="quantization",choices=[4,8],value=8,interactive=True)
549
+ lora_r = gr.Number(info="LoRA_r is a hyperparameter associated with the rank of the low-rank approximation used in LoRA.",label="lora_r",value=16,interactive=True)
550
+ lora_alpha = gr.Number(info="LoRA_alpha is a hyperparameter used in LoRA for controlling the strength of the adaptation.",label="lora_alpha",value=32,interactive=True)
551
+ lora_dropout = gr.Number(info="LoRA_dropout is a hyperparameter used in LoRA to control the dropout rate during fine-tuning.",label="lora_dropout",value=.05,interactive=True)
552
+ with gr.Row():
553
+ edit_code=gr.Button("Advance Code Editing")
554
+ with gr.Row():
555
+ code_temp=gr.Code(visible=False)
556
+ with gr.Row():
557
+ parameter_alter=gr.Button("Fine-tune")
558
+ with gr.Row():
559
+ fin_com=gr.Label(visible=False)
560
+ edit_code.click(code_show,model_name,code_temp)
561
+ # On click finetune button
562
+ parameter_alter.click(edit_model_parameter,[model_name,edit_code,code_temp,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout],model_name)
563
+ model_name.change(custom_model,model_name,[code_temp,edit_code])
564
+ with gr.Tab("Embedding model"):
565
+ with gr.Row():
566
+ embedding_model = gr.Dropdown(
567
+ choices=[
568
+ "BAAI/bge-base-en-v1.5",
569
+ "dunzhang/stella_en_1.5B_v5",
570
+ "dunzhang/stella_en_400M_v5",
571
+ "nvidia/NV-Embed-v2",
572
+ "Alibaba-NLP/gte-Qwen2-1.5B-instruct",
573
+ ],
574
+ label="Select the embedding model for fine-tuning",
575
+ )
576
+ loss_function = gr.Dropdown(
577
+ choices=[
578
+ "MultipleNegativesRankingLoss",
579
+ "OnlineContrastiveLoss",
580
+ "CoSENTLoss",
581
+ "GISTEmbedLoss",
582
+ "TripletLoss",
583
+ ],
584
+ label="Select the loss function",
585
+ )
586
+
587
+ epoch=gr.Number(label="epochs",value=1,interactive=True,info="One complete pass through the entire training dataset during the training process.")
588
+ batch_size=gr.Number(label="batch_size",value=8,interactive=True,info="The number of training examples used in one iteration of training.")
589
+ with gr.Row():
590
+ btn_emb = gr.Button("Fine-tune the embedding model")
591
+
592
+
593
+ # with gr.Row():
594
+ # with gr.Accordion(label="Expected data format according to loss function"):
595
+ # loss_info = gr.Markdown(
596
+ # """
597
+ # # Expected data format according to loss function:
598
+ # ### Format `data/emb_data.xlsx` | `data/emb_data.xlsx` accordingly.
599
+
600
+ # **MultipleNegativesRankingLoss:**
601
+ # Expects data with columns: `anchor`, `positive`, `negative`.
602
+ # - `anchor`: The sentence to be embedded.
603
+ # - `positive`: A sentence semantically similar to the anchor.
604
+ # - `negative`: A sentence semantically dissimilar to the anchor.
605
+
606
+ # **OnlineContrastiveLoss:**
607
+ # Expects data with columns: `sentence1`, `sentence2`, `label`.
608
+ # - `sentence1`, `sentence2`: Pairs of sentences.
609
+ # - `label`: 1 if the sentences are similar, 0 if dissimilar.
610
+
611
+ # **CoSENTLoss:**
612
+ # Expects data with columns: `sentence1`, `sentence2`, `score`.
613
+ # - `sentence1`, `sentence2`: Pairs of sentences.
614
+ # - `score`: A float value (e.g., 0-1) representing their similarity.
615
+
616
+ # **GISTEmbedLoss:**
617
+ # Expects data with either:
618
+ # - Columns: `anchor`, `positive`, `negative` (like TripletLoss).
619
+ # - Columns: `anchor`, `positive` (for pairs of similar sentences).
620
+
621
+ # **TripletLoss:**
622
+ # Expects data with columns: `anchor`, `positive`, `negative`.
623
+ # - `anchor`: The sentence to be embedded.
624
+ # - `positive`: A sentence semantically similar to the anchor.
625
+ # - `negative`: A sentence semantically dissimilar to the anchor.
626
+ # """
627
+ # )
628
+
629
+
630
+
631
+ btn_emb.click(finetune_emb,[embedding_model, loss_function, epoch, batch_size], None)
632
+ #***************************************************
633
+ with gr.Tab("Testing Data Generation and RAG Customization"):
634
+ from utils import save_params_to_file
635
+ def ans_gen_fun(model_name_local,model_name_online,embedding_name,
636
+ splitter_type_dropdown,chunk_size_slider,
637
+ chunk_overlap_slider,separator_textbox,max_tokens_slider,save_as_fav,progress=gr.Progress()):
638
+ if not os.path.exists(os.path.join("data","testing_dataset.xlsx")):
639
+ gr.Warning("You need to create testing dataset first from Data collection.")
640
+ return
641
+ if save_as_fav:
642
+ save_params_to_file(model_name_local,embedding_name,
643
+ splitter_type_dropdown,chunk_size_slider,
644
+ chunk_overlap_slider,separator_textbox,max_tokens_slider)
645
+ # if not os.path.exists(model_name_local):
646
+ # gr.Error("Model not found in local folder!!")
647
+ import time
648
+ from model_ret import calculate_rag_metrics
649
+ progress(0, desc="Starting...")
650
+ idx=1
651
+ model_ques_ans_gen=[]
652
+ df_temp=pd.read_excel(r"data/testing_dataset.xlsx")
653
+ infer_model = model_chain(model_name_local,model_name_online,
654
+ True,embedding_name,splitter_type_dropdown,chunk_size_slider,
655
+ chunk_overlap_slider,separator_textbox,max_tokens_slider)
656
+ # rag_chain=infer_model.rag_chain_ret()
657
+ print("Processing test dataset...")
658
+ for x in progress.tqdm(df_temp.values):
659
+ model_ques_ans_gen.append({
660
+ "id":idx,
661
+ "question":x[0],
662
+ 'answer':infer_model.ans_ret(x[0]),
663
+ # "contexts":x[2],
664
+ "ground_truths":x[1]
665
+ })
666
+ idx+=1
667
+ print("Done processing test dataset!")
668
+ model_name = infer_model.model_name.split('/')[-1]
669
+ temp=calculate_rag_metrics(model_ques_ans_gen,model_name)
670
+ # print(temp)
671
+ # temp['Average Rating'] = temp.mean(axis=1)
672
+ pd.DataFrame(temp).to_excel(os.path.join("model_ans",f"_{model_name+cur_time}.xlsx"),index=False)
673
+ rag_metrics = ['answer_correctness', 'answer_similarity', 'answer_relevancy', 'faithfulness', 'context_recall', 'context_precision']
674
+ new_df = pd.DataFrame({'Rag Metric': rag_metrics, 'Average Rating': 0.2})
675
+
676
+ return gr.BarPlot(
677
+ new_df,
678
+ x="Rag Metric",
679
+ y="Average Rating",
680
+ x_title="Rag Metric",
681
+ y_title="Average Rating",
682
+ title="RAG performance",
683
+ tooltip=["Rag Metric", "Average Rating"],
684
+ y_lim=[1, 200],
685
+ width=150,
686
+ # height=1000,
687
+ visible=True
688
+ )
689
+ gr.Info("Generating answer from model is finished!!! Now, it is ready for human evaluation. Model answer is saved in \"model_ans\" folder. ")
690
+
691
+ gr.Markdown(""" # Instructions:\n
692
+ In this page you can generate answer from fine-tuned models for human evaluation. The questions must be created using `Testing data generation` section of `Data collection` tab.
693
+ """)
694
+ with gr.Row():
695
+ embedding_name=gr.Dropdown(choices=["BAAI/bge-base-en-v1.5","dunzhang/stella_en_1.5B_v5","dunzhang/stella_en_400M_v5",
696
+ "nvidia/NV-Embed-v2","Alibaba-NLP/gte-Qwen2-1.5B-instruct"],
697
+ label="Select the Embedding Model")
698
+ splitter_type_dropdown = gr.Dropdown(choices=["character", "recursive", "token"],
699
+ value="character", label="Splitter Type",interactive=True)
700
+
701
+ chunk_size_slider = gr.Slider(minimum=100, maximum=2000, value=500, step=50, label="Chunk Size")
702
+ chunk_overlap_slider = gr.Slider(minimum=0, maximum=500, value=30, step=10, label="Chunk Overlap",interactive=True)
703
+ separator_textbox = gr.Textbox(value="\n", label="Separator (e.g., newline '\\n')",interactive=True)
704
+ max_tokens_slider = gr.Slider(minimum=100, maximum=5000, value=1000, step=100, label="Max Tokens",interactive=True)
705
+ with gr.Row():
706
+ save_as_fav=gr.Checkbox(label="Save this settings as favorite")
707
+ inf_checkbox=gr.Checkbox(label="Do you want to use without fine-tuned model from Hugging face?")
708
+ model_name_local=gr.Dropdown(visible=False)
709
+ model_name_online=gr.Dropdown(visible=False)
710
+ def model_online_local_show(inf_checkbox):
711
+ if inf_checkbox:
712
+ return [gr.Dropdown(visible=False),
713
+ gr.Dropdown(choices=llama_models,
714
+ label="Select the LLM from Huggingface",visible=True)]
715
+ else:
716
+ return [gr.Dropdown(choices=os.listdir("models"),label="Select the fine-tuned LLM",visible=True),
717
+ gr.Dropdown(visible=False)]
718
+ inf_checkbox.change(model_online_local_show,[inf_checkbox],[model_name_local,model_name_online])
719
+
720
+ with gr.Row():
721
+ ans_gen=gr.Button("Generate Answer for Testing Dataset")
722
+ with gr.Row():
723
+ plot = gr.BarPlot(visible=False)
724
+ ans_gen.click(ans_gen_fun,[model_name_local,model_name_online,embedding_name,
725
+ splitter_type_dropdown,chunk_size_slider,
726
+ chunk_overlap_slider,separator_textbox,max_tokens_slider,save_as_fav],plot)
727
+ #***************************************************Human evaluation
728
+ import secrets
729
+
730
+ def generate_token():
731
+ while True:
732
+ token=secrets.token_hex(6)
733
+ f=[x[:-5] for x in os.listdir("save_ques_ans")]
734
+ if token not in f:
735
+ data = {
736
+ 'id': [],
737
+ 'question': [],
738
+ 'answer': []
739
+ }
740
+ df = pd.DataFrame(data)
741
+ df.to_excel("save_ques_ans//"+str(token)+".xlsx", index=False)
742
+ return gr.Label(label="Please keep the token for tracking question answer data",value=token,visible=True)
743
+
744
+ def bar_plot_fn():
745
+ temp=score_report_bar()
746
+ return gr.BarPlot(
747
+ temp,
748
+ x="Model Name",
749
+ y="Average Rating",
750
+ x_title="Model name",
751
+ y_title="Average Rating",
752
+ title="Model performance",
753
+ tooltip=["Model Name", "Average Rating"],
754
+ y_lim=[1, 200],
755
+ width=150,
756
+ # height=1000,
757
+ visible=True
758
+ )
759
+ with gr.Tab("Human Evaluation"):
760
+ def answer_updated(model_ans):
761
+ df_ques_ans=pd.read_excel(os.path.join("model_ans",str(model_ans)))
762
+ num=0
763
+ print(df_ques_ans['id'][num],"**"*10)
764
+ return [gr.Markdown(value=f"""# Model_name: {model_ans}
765
+ # Number of questions: {len(df_ques_ans)}""",visible=True),
766
+ gr.Label(value=str(df_ques_ans['id'][num])),
767
+ gr.Label(value=str(df_ques_ans['question'][num])),
768
+ gr.Label(value=str(df_ques_ans['answer'][num])),
769
+ gr.Dropdown(visible=False),
770
+ gr.Button(visible=False)
771
+ ]
772
+
773
+ with gr.Row():
774
+ new_user=gr.Button("New User")
775
+ with gr.Row():
776
+ new_user_token=gr.Label(visible=False)
777
+ with gr.Row():
778
+ token_key=gr.Textbox(label="Enter your Token")
779
+ model_ans=gr.Dropdown(choices=os.listdir("model_ans"),label="Select the Model Answer for Human Evaluation")
780
+ btn_1=gr.Button("Submit")
781
+ gr.Markdown(""" # Instructions:
782
+ In this section, humans evaluate the answers of the model given specific questions. Each answer is rated between 1 and 5 by anonymous students.
783
+ Those values are saved in the `scrore_report` folder.
784
+ """)
785
+ lab_temp=gr.Markdown(visible=False)
786
+
787
+ with gr.Row():
788
+ id=gr.Label(value="",label="ID")
789
+ with gr.Row():
790
+ ques=gr.Label(value="",label="Question")
791
+ with gr.Row():
792
+ ans=gr.Label(value="",label="Answer")
793
+ with gr.Row():
794
+ score = gr.Radio(choices=[1,2,3,4,5],label="Rating")
795
+ with gr.Row():
796
+ human_ans_btn=gr.Button("Show Answer From Other Evaluators")
797
+ with gr.Row():
798
+ human_ans_lab=gr.Label(label="Human Answer",visible=False)
799
+ with gr.Row():
800
+ btn = gr.Button("Save")
801
+ question = gr.Button("Skip")
802
+ # with gr.Row():
803
+ # save_all_btn=gr.Button("Save all the data in dataframe")
804
+ # with gr.Row():
805
+ # move=gr.Number(label="Move to the question")
806
+ # move_btn=gr.Button("move")
807
+ with gr.Row():
808
+ btn_plot=gr.Button("Plot Generation")
809
+ with gr.Row():
810
+ plot = gr.BarPlot(visible=False)
811
+ btn_plot.click(bar_plot_fn, None, outputs=plot)
812
+ btn_1.click(answer_updated,model_ans,[lab_temp,id,ques,ans,model_ans,btn_1])
813
+ btn.click(score_save, inputs=[ques,ans,score,model_ans,token_key], outputs=[id,ques,ans])
814
+ question.click(new_ques,model_ans,[id,ques,ans])
815
+ # save_all_btn.click(save_all,model_ans,None)
816
+ # move_btn.click(move_to,[move,model_ans],[id,ques,ans])
817
+ def human_ans_func(id, ques):
818
+ temp=all_contri_ans(id,ques)
819
+ return [gr.Button("Show Answer from Other Evaluators"),gr.Label(value="\n".join(temp),visible=True)]
820
+ human_ans_btn.click(human_ans_func,[id, ques],[human_ans_btn,human_ans_lab])
821
+ new_user.click(generate_token,None,new_user_token)
822
+
823
+ #***************************************************
824
+ infer_ragchain=None
825
+ with gr.Tab("Inference"):
826
+ def echo(message, history,model_name_local,model_name_online,
827
+ inf_checkbox,embedding_name,splitter_type_dropdown,chunk_size_slider,
828
+ chunk_overlap_slider,separator_textbox,max_tokens_slider):
829
+ global infer_ragchain
830
+ if infer_ragchain is None:
831
+ gr.Info("Please wait!!! model is loading!!")
832
+ if inf_checkbox:
833
+ gr.Info("Model is loading from Huggingface!!")
834
+ infer_ragchain = model_chain(model_name_local,model_name_online,
835
+ inf_checkbox,embedding_name,splitter_type_dropdown,chunk_size_slider,
836
+ chunk_overlap_slider,separator_textbox,max_tokens_slider)
837
+ # rag_chain=infer_ragchain.rag_chain_ret()
838
+ return infer_ragchain.ans_ret(message)
839
+ from utils import load_params_from_file
840
+ saved_params = load_params_from_file()
841
+ # If saved parameters exist, use them; otherwise, set default values
842
+ default_model_name = saved_params['model_name'] if saved_params else "Llama"
843
+ default_embedding_name = saved_params['embedding_name'] if saved_params else "BAAI/bge-base-en-v1.5"
844
+ default_splitter_type = saved_params['splitter_type_dropdown'] if saved_params else "character"
845
+ default_chunk_size = saved_params['chunk_size_slider'] if saved_params else 500
846
+ default_chunk_overlap = saved_params['chunk_overlap_slider'] if saved_params else 30
847
+ default_separator = saved_params['separator_textbox'] if saved_params else "\n"
848
+ default_max_tokens = saved_params['max_tokens_slider'] if saved_params else 1000
849
+ # with gr.Row():
850
+ with gr.Row():
851
+ def login_hug(token):
852
+ from huggingface_hub import login
853
+ login(token=token)
854
+ login_hug(os.getenv('HF_TOKEN'))
855
+ embedding_name=gr.Dropdown(choices=["BAAI/bge-base-en-v1.5","dunzhang/stella_en_1.5B_v5","dunzhang/stella_en_400M_v5",
856
+ "nvidia/NV-Embed-v2","Alibaba-NLP/gte-Qwen2-1.5B-instruct"],value=default_embedding_name,
857
+ label="Select the Embedding Model")
858
+ splitter_type_dropdown = gr.Dropdown(choices=["character", "recursive", "token"],
859
+ value=default_splitter_type, label="Splitter Type",interactive=True)
860
+
861
+ chunk_size_slider = gr.Slider(minimum=100, maximum=2000, value=default_chunk_size, step=50, label="Chunk Size")
862
+ chunk_overlap_slider = gr.Slider(minimum=0, maximum=500, value=default_chunk_overlap, step=10, label="Chunk Overlap",interactive=True)
863
+ separator_textbox = gr.Textbox(value=default_separator, label="Separator (e.g., newline '\\n')",interactive=True)
864
+ max_tokens_slider = gr.Slider(minimum=100, maximum=5000, value=default_max_tokens, step=100, label="Max Tokens",interactive=True)
865
+
866
+ inf_checkbox=gr.Checkbox(label="Do you want to use without fine-tuned model from Hugging face?")
867
+ model_name_local=gr.Dropdown(visible=False)
868
+ model_name_online=gr.Dropdown(visible=False)
869
+ def model_online_local_show(inf_checkbox):
870
+ if inf_checkbox:
871
+ return [gr.Dropdown(visible=False),
872
+ gr.Dropdown(choices=llama_models,
873
+ label="Select the LLM from Huggingface",visible=True)]
874
+ else:
875
+ return [gr.Dropdown(choices=os.listdir("models"),label="Select the fine-tuned LLM",visible=True),
876
+ gr.Dropdown(visible=False)]
877
+
878
+ inf_checkbox.change(model_online_local_show,[inf_checkbox],[model_name_local,model_name_online])
879
+ gr.ChatInterface(fn=echo,
880
+ additional_inputs=[model_name_local,model_name_online,inf_checkbox,embedding_name,
881
+ splitter_type_dropdown,chunk_size_slider,
882
+ chunk_overlap_slider,separator_textbox,max_tokens_slider],
883
+ title="Chatbot")
884
+ #----------------------------------------------
885
+ with gr.Tab("Deployment"):
886
+ gr.Markdown("""`deploy` folder has all the code for the deployment of the model.
887
+ For installing dependencies use the following command: `pip install -r requirements.txt`.
888
+ """)
889
+ def deploy_func(model_name):
890
+ import shutil
891
+ import os
892
+ src_folder = 'src'
893
+ deploy_folder = 'deploy'
894
+ files_to_copy = ['model_ret.py', 'create_retriever.py', 'inference.py']
895
+ for file_name in files_to_copy:
896
+ src_file_path = os.path.join(src_folder, file_name)
897
+ dest_file_path = os.path.join(deploy_folder, file_name)
898
+ param_list=load_params_from_file()
899
+ param_list["model_name"]=model_name
900
+ save_params_to_file(param_list)
901
+ # f.write(f"{model_name}")
902
+
903
+
904
+ model_name=gr.Dropdown(choices=os.listdir("models"),label="Select the Model")
905
+ btn_model=gr.Button("Deploy")
906
+ btn_model.click(deploy_func,model_name)
907
+
908
+ demo.launch(share=True, debug=True)
909
+ save_ques_ans=[]
910
+ save_ques_ans_test=[]
911
+ cur_time=current_time()
912
+
913
+ def random_ques_ans(model_ans):
914
+ df_temp=pd.read_excel(os.path.join("model_ans",str(model_ans)))
915
+ global cnt
916
+ id=int((df_temp.loc[cnt])['id'])
917
+ ques_temp=(df_temp.loc[cnt])['question']
918
+ ans_temp=(df_temp.loc[cnt])['answer']
919
+ cnt+=1
920
+ if cnt>=len(df_temp):
921
+ cnt=0
922
+ return ques_temp,ans_temp,id,0
923
+ return ques_temp,ans_temp,id,1
924
+ def save_all(model_ans):
925
+ temp=pd.DataFrame(data)
926
+ temp.to_excel(f"score_report\\{model_ans+cur_time}.xlsx",index=False)
927
+ gr.Info("Sucessfully save all the answer!!!")
928
+
929
+ def score_save(ques,ans,score,model_ans,token_key):
930
+ data.append({
931
+ "question":ques,
932
+ 'answer':ans,
933
+ 'rating':score
934
+ })
935
+ # if len(data)%5==0:
936
+ temp=pd.DataFrame(data)
937
+ temp.to_excel(f"score_report\\{model_ans+cur_time}.xlsx",index=False)
938
+ gr.Info("Sucessfully saved in local folder!!!")
939
+ ques_temp,ans_temp,id,flag=random_ques_ans(model_ans)
940
+ gr.Info("Your opinion is submitted successfully!!!")
941
+ return gr.Label(value=id,label="ID"),gr.Label(value=ques_temp, label="Question"), gr.Label(value=ans_temp, label="Answer")
942
+
943
+ def new_ques(model_ans):
944
+ ques_temp,ans_temp,id2,flag=random_ques_ans(model_ans)
945
+ return {
946
+ id:gr.Label(value=id2,label="ID"),
947
+ ques:gr.Label(value=ques_temp,label="Question"),
948
+ ans:gr.Label(value=ans_temp,label="Answer")
949
+ }
950
+
951
+ def save_the_ques(ques,ans,file_type = 'xlsx'):
952
+ """
953
+ Saves a question and answer pair to a specified file (xlsx or csv).
954
+
955
+ Args:
956
+ ques (str): The question.
957
+ ans (str): The answer.
958
+ file_type (str, optional): The file type to save to ("xlsx" or "csv").
959
+ Defaults to "xlsx".
960
+
961
+ Returns:
962
+ str: A success label.
963
+ """
964
+
965
+ new_data = {"question": [ques], "answer": [ans]}
966
+ df_new = pd.DataFrame(new_data)
967
+
968
+ filepath = f"data/finetune_data.{file_type}"
969
+
970
+ if Path(filepath).is_file():
971
+ df_existing = pd.read_excel(filepath) if file_type == "xlsx" else pd.read_csv(filepath)
972
+ df_combined = pd.concat([df_existing, df_new], ignore_index=True)
973
+ else:
974
+ df_combined = df_new
975
+
976
+ if file_type == "xlsx":
977
+ df_combined.to_excel(filepath, index=False)
978
+ elif file_type == "csv":
979
+ df_combined.to_csv(filepath, index=False)
980
+
981
+ return gr.Label(value="Successfully saved in local folder.", visible=True)
982
+
983
+ def save_the_ques_test(ques, ans, file_type = 'xlsx'):
984
+ """
985
+ Saves a question and answer pair to a specified file (xlsx or csv).
986
+
987
+ Args:
988
+ ques (str): The question.
989
+ ans (str): The answer.
990
+ file_type (str, optional): The file type to save to ("xlsx" or "csv").
991
+ Defaults to "xlsx".
992
+
993
+ Returns:
994
+ str: A success label.
995
+ """
996
+
997
+ new_data = {"question": [ques], "answer": [ans]}
998
+ df_new = pd.DataFrame(new_data)
999
+
1000
+ filepath = f"data/testing_data.{file_type}"
1001
+
1002
+ if Path(filepath).is_file():
1003
+ df_existing = pd.read_excel(filepath) if file_type == "xlsx" else pd.read_csv(filepath)
1004
+ df_combined = pd.concat([df_existing, df_new], ignore_index=True)
1005
+ else:
1006
+ df_combined = df_new
1007
+
1008
+ if file_type == "xlsx":
1009
+ df_combined.to_excel(filepath, index=False)
1010
+ elif file_type == "csv":
1011
+ df_combined.to_csv(filepath, index=False)
1012
+
1013
+ return gr.Label(value="Successfully saved in local folder.", visible=True)
1014
+
1015
+ import pandas as pd
1016
+ from pathlib import Path
1017
+
1018
+ def save_emb_data(loss_function, first_input, second_input, third_input, file_type="xlsx"):
1019
+ """
1020
+ Saves embedding data based on the specified loss function to either an Excel
1021
+ file (xlsx) or a CSV file (csv).
1022
+
1023
+ Args:
1024
+ loss_function (str): The name of the loss function.
1025
+ first_input: The first input data.
1026
+ second_input: The second input data.
1027
+ third_input: The third input data.
1028
+ file_type (str, optional): The file type to save to ("xlsx" or "csv").
1029
+ Defaults to "xlsx".
1030
+
1031
+ Returns:
1032
+ str: A success message indicating whether data was appended or a new file
1033
+ was created.
1034
+ """
1035
+
1036
+ if loss_function == "MultipleNegativesRankingLoss":
1037
+ data = pd.DataFrame({
1038
+ "anchor": [first_input],
1039
+ "positive": [second_input],
1040
+ "negative": [third_input]
1041
+ })
1042
+ elif loss_function == "OnlineContrastiveLoss":
1043
+ data = pd.DataFrame({
1044
+ "sentence1": [first_input],
1045
+ "sentence2": [second_input],
1046
+ "label": [third_input]
1047
+ })
1048
+ elif loss_function == "CoSENTLoss":
1049
+ data = pd.DataFrame({
1050
+ "sentence1": [first_input],
1051
+ "sentence2": [second_input],
1052
+ "score": [third_input]
1053
+ })
1054
+ elif loss_function == "GISTEmbedLoss":
1055
+ data = pd.DataFrame({
1056
+ "anchor": [first_input],
1057
+ "positive": [second_input],
1058
+ "negative": [third_input]
1059
+ })
1060
+ elif loss_function == "TripletLoss":
1061
+ data = pd.DataFrame({
1062
+ "anchor": [first_input],
1063
+ "positive": [second_input],
1064
+ "negative": [third_input]
1065
+ })
1066
+
1067
+ filepath = f"data/emb_data.{file_type}"
1068
+
1069
+ try:
1070
+ if file_type == "xlsx":
1071
+ existing_data = pd.read_excel(filepath)
1072
+ elif file_type == "csv":
1073
+ existing_data = pd.read_csv(filepath)
1074
+
1075
+ if list(data.columns) == list(existing_data.columns):
1076
+ combined_data = pd.concat([existing_data, data], ignore_index=True)
1077
+ if file_type == "xlsx":
1078
+ combined_data.to_excel(filepath, index=False)
1079
+ elif file_type == "csv":
1080
+ combined_data.to_csv(filepath, index=False)
1081
+ return "Data appended to existing file!"
1082
+ else:
1083
+ if file_type == "xlsx":
1084
+ data.to_excel(filepath, index=False)
1085
+ elif file_type == "csv":
1086
+ data.to_csv(filepath, index=False)
1087
+ return "Data saved to a new file (overwritten)!"
1088
+
1089
+ except FileNotFoundError:
1090
+ if file_type == "xlsx":
1091
+ data.to_excel(filepath, index=False)
1092
+ elif file_type == "csv":
1093
+ data.to_csv(filepath, index=False)
1094
+ return "Data saved to a new file!"
1095
+
1096
+ def parse_data_func(link_temp,progress=gr.Progress()):
1097
+ progress(0, desc="Starting...")
1098
+ parse_data(link_temp,progress)
1099
+ gr.Info("Finished parsing!! Save as a docx file.")
1100
+
1101
+ def next_ques(ques,ans):
1102
+ ques_temp,ans_temp=random_ques_ans2()
1103
+ return gr.Label(value=ques_temp)
1104
+
1105
+ with gr.Blocks(title="LLM QA Chatbot Builder",theme=gr.themes.Soft()) as demo:
1106
+ gr.Markdown("""
1107
+ # LLM QA Chatbot Builder
1108
+ """)
1109
+ with gr.Tab("Data Collection"):
1110
+ gr.Markdown(""" # Instructions:
1111
+ In this page you can prepare data for LLM fine-tuning, testing and embedding model finetuning your model. The data can be provided through Excel file or CSV file or directly via web interface. Additionally, data can be parsed from the target website (Data parsing for RAG) to further enhance the model performance.
1112
+
1113
+ ## 1. If you want to provide data in Excel file or CSV file for model fine-tuning and testing.
1114
+ - Create an Excel or CSV file in the data folder and name it `finetune_data.xlsx` or `finetune_data.csv` for finetuning the model.
1115
+ - Create an Excel or CSV file in the data folder and name it `testing_data.xlsx` or `testing_data.csv` for generating answers using the fine-tuned model.
1116
+ - `finetune_data.xlsx` or `finetune_data.csv` has two columns: `question` and `answer`. `testing_data.xlsx` or `testing_data.csv` has three columns: `question`, `ground_truth` ,`context`.
1117
+ """)
1118
+ gr.Markdown("""
1119
+ ## `finetune_data.xlsx` | `finetune_data.csv`
1120
+ """)
1121
+ gr.HTML(value=display_table(), label="finetune_data.xlsx or finetune_data.csv")
1122
+ gr.Markdown("""
1123
+ ## `testing_data.xlsx` | `testing_data.csv`
1124
+ """)
1125
+ gr.HTML(value=display_table("data/demo_test_data.xlsx"), label="testing_data.xlsx or testing_data.csv")
1126
+ gr.Markdown("""
1127
+ ## 2. You can use the below interface to create the dataset for training and testing models.
1128
+ """)
1129
+
1130
+ #Training data generation
1131
+ with gr.Tab("Training Data Generation"):
1132
+ with gr.Tab("Existing Questions"):
1133
+ gr.Markdown("""
1134
+ Existing questions are provided by the administrator and placed in the data folder named `existing_dataset.xlsx`. This file has only one column: `question`.
1135
+ After clicking the `Save the Answer` button. Those questions and answers are saved in the `data` folder as a `finetune_data.xlsx` file.
1136
+ """)
1137
+ ques_temp,ans_temp=random_ques_ans2()
1138
+ with gr.Row():
1139
+ ques=gr.Label(value=ques_temp,label="Question")
1140
+ with gr.Row():
1141
+ ans=gr.TextArea(label="Answer")
1142
+ with gr.Row():
1143
+ with gr.Row():
1144
+ type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
1145
+ save_training = gr.Button(value="Save")
1146
+ question = gr.Button("Generate New Question")
1147
+ with gr.Row():
1148
+ lab=gr.Label(visible=False)
1149
+ question.click(next_ques,None,ques)
1150
+ save_training.click(save_the_ques,[ques,ans,type_options],lab)
1151
+
1152
+ with gr.Tab("Custom Questions"):
1153
+ gr.Markdown("""
1154
+ After clicking the `save the answer` button. Those questions and answers are saved in the `data` folder as a `finetune_data.xlsx` file.
1155
+ """)
1156
+ with gr.Row():
1157
+ ques=gr.Textbox(label="Question")
1158
+ with gr.Row():
1159
+ ans=gr.TextArea(label="Answer")
1160
+ with gr.Row():
1161
+ with gr.Row():
1162
+ type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
1163
+ save_training = gr.Button(value="Save")
1164
+ with gr.Row():
1165
+ lab=gr.Label(visible=False,value="You answer is submitted!!! Thank you for your contribution.",label="Submitted")
1166
+ save_training.click(save_the_ques,[ques,ans,type_options],lab)
1167
+
1168
+ ### Testing data generation
1169
+ with gr.Tab("Testing Data Generation"):
1170
+ gr.Markdown("""
1171
+ You can create test data for generating answers using the fine-tune model, which will be used for testing the model's performance.
1172
+ After clicking the `Save the Answer` button. Those questions and answers are saved in the `data` folder as a `testing_data.xlsx` file.
1173
+ """)
1174
+ with gr.Row():
1175
+ ques=gr.Textbox(label="Question")
1176
+ with gr.Row():
1177
+ ans=gr.TextArea(label="Ground Truth")
1178
+ with gr.Row():
1179
+ ans=gr.TextArea(label="Contexts")
1180
+ with gr.Row():
1181
+ with gr.Row():
1182
+ type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
1183
+ save_test = gr.Button(value="Save")
1184
+ with gr.Row():
1185
+ lab=gr.Label(visible=False,value="You answer is submitted!!! Thank you for your contribution.",label="Submitted")
1186
+ save_test.click(save_the_ques_test,[ques,ans,type_options],None)
1187
+
1188
+ ## Embedding data generation
1189
+ def update_fields(loss_function):
1190
+ if loss_function == "MultipleNegativesRankingLoss":
1191
+ first_input = gr.Textbox(label="Anchor", visible=True, placeholder="The sentence to be embedded.")
1192
+ second_input = gr.Textbox(label="Positive", visible=True, placeholder="A sentence semantically similar to the anchor.")
1193
+ third_input = gr.Textbox(label="Negative", visible=True, placeholder="A sentence semantically dissimilar to the anchor.")
1194
+ markdown = gr.Markdown(
1195
+ """
1196
+ **MultipleNegativesRankingLoss:**
1197
+ Expects data with columns: `anchor`, `positive`, `negative`.
1198
+ - `anchor`: The sentence to be embedded.
1199
+ - `positive`: A sentence semantically similar to the anchor.
1200
+ - `negative`: A sentence semantically dissimilar to the anchor.""",
1201
+ visible=True
1202
+ )
1203
+ elif loss_function == "OnlineContrastiveLoss":
1204
+ first_input = gr.Textbox(label="Sentence 1", visible=True, placeholder="The first sentence.")
1205
+ second_input = gr.Textbox(label="Sentence 2", visible=True, placeholder="The second sentence.")
1206
+ third_input = gr.Textbox(label="Label", visible=True, placeholder="1 if the sentences are similar, 0 if dissimilar.")
1207
+ markdown = gr.Markdown(
1208
+ """
1209
+ **OnlineContrastiveLoss:**
1210
+ Expects data with columns: `sentence1`, `sentence2`, `label`.
1211
+ - `sentence1`, `sentence2`: Pairs of sentences.
1212
+ - `label`: 1 if the sentences are similar, 0 if dissimilar.""",
1213
+ visible=True
1214
+ )
1215
+ elif loss_function == "CoSENTLoss":
1216
+ first_input = gr.Textbox(label="Sentence 1", visible=True, placeholder="The first sentence.")
1217
+ second_input = gr.Textbox(label="Sentence 2", visible=True, placeholder="The second sentence.")
1218
+ third_input = gr.Textbox(label="Score", visible=True, placeholder="A float value (e.g., 0-1) representing their similarity.")
1219
+ markdown = gr.Markdown(
1220
+ """
1221
+ **CoSENTLoss:**
1222
+ Expects data with columns: `sentence1`, `sentence2`, `score`.
1223
+ - `sentence1`, `sentence2`: Pairs of sentences.
1224
+ - `score`: A float value (e.g., 0-1) representing their similarity.""",
1225
+ visible=True
1226
+ )
1227
+ elif loss_function == "GISTEmbedLoss":
1228
+ first_input = gr.Textbox(label="Anchor", visible=True, placeholder="The sentence to be embedded.")
1229
+ second_input = gr.Textbox(label="Positive", visible=True, placeholder="A sentence semantically similar to the anchor.")
1230
+ third_input = gr.Textbox(label="Negative", visible=True, placeholder="A sentence semantically dissimilar to the anchor. Can be empty.")
1231
+ markdown = gr.Markdown(
1232
+ """
1233
+ **GISTEmbedLoss:**
1234
+ Expects data with either:
1235
+ - Columns: `anchor`, `positive`, `negative` (like TripletLoss).
1236
+ - Columns: `anchor`, `positive` (for pairs of similar sentences).""",
1237
+ visible=True
1238
+ )
1239
+ elif loss_function == "TripletLoss":
1240
+ first_input = gr.Textbox(label="Anchor", visible=True, placeholder="The sentence to be embedded.")
1241
+ second_input = gr.Textbox(label="Positive", visible=True, placeholder="A sentence semantically similar to the anchor.")
1242
+ third_input = gr.Textbox(label="Negative", visible=True, placeholder="A sentence semantically dissimilar to the anchor.")
1243
+ markdown = gr.Markdown(
1244
+ """
1245
+ **TripletLoss:**
1246
+ Expects data with columns: `anchor`, `positive`, `negative`.
1247
+ - `anchor`: The sentence to be embedded.
1248
+ - `positive`: A sentence semantically similar to the anchor.
1249
+ - `negative`: A sentence semantically dissimilar to the anchor.""",
1250
+ visible=True
1251
+ )
1252
+ else:
1253
+ first_input = gr.Textbox(visible=False)
1254
+ second_input = gr.Textbox(visible=False)
1255
+ third_input = gr.Textbox(visible=False)
1256
+ markdown = gr.Markdown(visible=False)
1257
+
1258
+ return first_input, second_input, third_input, markdown
1259
+
1260
+ with gr.Tab("Embedding Data Generation"):
1261
+ gr.Markdown("**Choose a loss function to format your embedding data.**")
1262
+ with gr.Row():
1263
+ loss_function = gr.Dropdown(
1264
+ choices=[
1265
+ "MultipleNegativesRankingLoss",
1266
+ "OnlineContrastiveLoss",
1267
+ "CoSENTLoss",
1268
+ "GISTEmbedLoss",
1269
+ "TripletLoss",
1270
+ ],
1271
+ label="Select the loss function",
1272
+ )
1273
+ with gr.Row():
1274
+ gr.Markdown("""Format `data/emb_data.xlsx` or `data/emb_data.csv` to the expected data format, according to the selected loss function.
1275
+ If the file exists and has matching columns, new data will be appended.
1276
+ Otherwise, the file will be overwritten.""")
1277
+ with gr.Row():
1278
+ loss_info_markdown = gr.Markdown(visible=False)
1279
+ with gr.Row():
1280
+ first_input = gr.Textbox(label="Anchor", value="",visible=False)
1281
+ second_input = gr.Textbox(label="Positive", value="",visible=False)
1282
+ third_input = gr.Textbox(label="Negative", value="",visible=False)
1283
+ loss_function.change(update_fields, loss_function, [first_input, second_input, third_input,loss_info_markdown])
1284
+ with gr.Row():
1285
+ with gr.Row():
1286
+ type_options = gr.Dropdown(choices=["Save xlsx", "Save csv"], value="Save xlsx", label="Preferred file type")
1287
+ save_emb = gr.Button(value="Save")
1288
+ save_emb.click(save_emb_data,[loss_function,first_input,second_input,third_input,type_options])
1289
+
1290
+ with gr.Row():
1291
+ gr.Markdown("""
1292
+ ## 3. Data parsing for RAG
1293
+ """)
1294
+ with gr.Row():
1295
+ link_temp=gr.Textbox(label="Enter Link to Parse Data for RAG",info="To provide the link for parsing the data from the website, this link can help create RAG data for the model.")
1296
+ parse_data_btn=gr.Button("Parse Data")
1297
+ from utils import parse_data
1298
+ parse_data_btn.click(parse_data_func,link_temp,link_temp)
1299
+
1300
+ #***************************************************
1301
+ with gr.Tab("Fine-tuning"):
1302
+ with gr.Tab("Fine-tune LLM"):
1303
+ with gr.Row():
1304
+ def login_hug(token):
1305
+ from huggingface_hub import login
1306
+ login(token=token)
1307
+ login_hug(os.getenv('HF_TOKEN'))
1308
+ gr.Markdown("""
1309
+ # Instructions:
1310
+ - Required VRAM for training: 24GB, for inference: 16GB.(Mistral, Zepyhr and Lllama)\n
1311
+ - Required VRAM for training: 5GB, for inference: 4GB.(Phi,Flan-T5)
1312
+ - For fine-tuning a custom model select `custom model` option in `Select the model for fine-tuning` dropdown section. The custom model can be configured by editing the code section.\n
1313
+ - After fine-tuning the model, it will be saved in the `models` folder.
1314
+ """)
1315
+
1316
+ def edit_model_parameter(model_name_temp,edit_code,code_temp,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout, progress=gr.Progress()):
1317
+ progress(0, desc="Fine-tune started!! please wait ...")
1318
+ # write code to files if code was edited
1319
+ if edit_code and len(code_temp)!=0:
1320
+ if model_name_temp=="Mistral":
1321
+ open(r"fine_tune_file/mistral_finetune.py","w").write(code_temp)
1322
+ elif model_name_temp=="Zephyr":
1323
+ open(r"fine_tune_file/zepyhr_finetune.py","w").write(code_temp)
1324
+ elif model_name_temp=="Llama":
1325
+ open(r"fine_tune_file/llama_finetune.py","w").write(code_temp)
1326
+ elif model_name_temp=="Phi":
1327
+ open(r"fine_tune_file/phi_finetune.py","w").write(code_temp)
1328
+ elif model_name_temp=="Custom model":
1329
+ open(r"fine_tune_file/finetune_file.py","w").write(code_temp)
1330
+ # importing just before finetuning, to ensure the latest code is used
1331
+ # from fine_tune_file.mistral_finetune import mistral_trainer
1332
+ # from fine_tune_file.zepyhr_finetune import zephyr_trainer
1333
+ # from fine_tune_file.llama_finetune import llama_trainer
1334
+ # from fine_tune_file.phi_finetune import phi_trainer
1335
+ from fine_tune_file.finetune_file import custom_model_trainer
1336
+ # from fine_tune_file.flant5_finetune import flant5_trainer
1337
+ from fine_tune_file.modular_finetune import get_trainer
1338
+ # create instance of the finetuning classes and then call the finetune function
1339
+
1340
+ if model_name_temp=="Custom model":
1341
+ gr.Info("Fine-tune started!!!")
1342
+ trainer=custom_model_trainer()
1343
+ trainer.custom_model_finetune()
1344
+ gr.Info("Fine-tune Ended!!!")
1345
+ else:
1346
+ trainer=get_trainer(model_name_temp)
1347
+ gr.Info("Fine-tune started!!!")
1348
+ if model_name_temp=="Mistral":
1349
+ trainer.mistral_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
1350
+ elif model_name_temp=="Zephyr":
1351
+ trainer.zepyhr_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
1352
+ elif model_name_temp=="Llama":
1353
+ trainer.llama_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
1354
+ elif model_name_temp=="Phi":
1355
+ trainer.phi_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
1356
+ elif model_name_temp=="Flant5":
1357
+ trainer.flant5_finetune(lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout)
1358
+ gr.Info("Fine-tune Ended!!!")
1359
+
1360
+ def code_show(model_name):
1361
+ if model_name=="Mistral":
1362
+ f=open(r"fine_tune_file/mistral_finetune.py").read()
1363
+ return gr.Code(visible=True,value=f,interactive=True,language="python")
1364
+ elif model_name=="Zephyr":
1365
+ f=open(r"fine_tune_file/zepyhr_finetune.py").read()
1366
+ return gr.Code(visible=True,value=f,interactive=True,language="python")
1367
+ elif model_name=="Llama":
1368
+ f=open(r"fine_tune_file/llama_finetune.py").read()
1369
+ return gr.Code(visible=True,value=f,interactive=True,language="python")
1370
+ elif model_name=="Phi":
1371
+ f=open(r"fine_tune_file/phi_finetune.py").read()
1372
+ return gr.Code(visible=True,value=f,interactive=True,language="python")
1373
+ elif model_name=="Flant5":
1374
+ f=open(r"fine_tune_file/flant5_finetune.py").read()
1375
+ return gr.Code(visible=True,value=f,interactive=True,language="python")
1376
+
1377
+ def custom_model(model_name): # It shows custom model code in the UI.
1378
+ if model_name=="Custom model":
1379
+ f=open(r"fine_tune_file/finetune_file.py").read()
1380
+ return [gr.Code(visible=True,value=f,interactive=True,language="python"),gr.Button(visible=False)]
1381
+ else:
1382
+ return [gr.Code(visible=False),gr.Button("Advance Code Editing",visible=True)]
1383
+ def change_code_fun(code_,model_name):
1384
+ if model_name=="Mistral":
1385
+ open(r"fine_tune_file/mistral_finetune.py","w").write(code_)
1386
+ gr.Info("Successfully saved code!!!")
1387
+ elif model_name=="Zephyr":
1388
+ open(r"fine_tune_file/zepyhr_finetune.py","w").write(code_)
1389
+ gr.Info("Successfully saved code!!!")
1390
+ elif model_name=="Llama":
1391
+ open(r"fine_tune_file/llama_finetune.py","w").write(code_)
1392
+ gr.Info("Successfully saved code!!!")
1393
+ elif model_name=="Phi":
1394
+ open(r"fine_tune_file/phi_finetune.py","w").write(code_)
1395
+ gr.Info("Successfully saved code!!!")
1396
+ elif model_name=="Flant5":
1397
+ open(r"fine_tune_file/flant5_finetune.py","w").write(code_)
1398
+ gr.Info("Successfully saved code!!!")
1399
+
1400
+ def finetune_emb(model_name, loss_name, epochs = 1, batch_size = 8):
1401
+ gr.Info("Embedding model fine-tune is started!!!")
1402
+ from embedding_tuner import EmbeddingFinetuner
1403
+ finetuner = EmbeddingFinetuner(
1404
+ model_name=model_name,
1405
+ loss_function=loss_name,
1406
+ epochs=epochs,
1407
+ batch_size=batch_size,
1408
+ )
1409
+ success = finetuner.train()
1410
+ if success:
1411
+ gr.Info("Embedding model fine-tune finished!!!")
1412
+
1413
+ with gr.Row():
1414
+ code_temp=gr.Code(visible=False)
1415
+ with gr.Row():
1416
+ model_name=gr.Dropdown(choices=["Mistral","Zephyr","Llama","Phi","Flant5","Custom model"],label="Select the LLM for fine-tuning")
1417
+ with gr.Accordion("Parameter Setup"):
1418
+ with gr.Row():
1419
+ lr=gr.Number(label="learning_rate",value=5e-6,interactive=True,info="The step size at which the model parameters are updated during training. It controls the magnitude of the updates to the model's weights.")
1420
+ epoch=gr.Number(label="epochs",value=2,interactive=True,info="One complete pass through the entire training dataset during the training process. It's a measure of how many times the algorithm has seen the entire dataset.")
1421
+ batch_size=gr.Number(label="batch_size",value=4,interactive=True,info="The number of training examples used in one iteration of training. It affects the speed and stability of the training process.")
1422
+ gradient_accumulation = gr.Number(info="Gradient accumulation involves updating model weights after accumulating gradients over multiple batches, instead of after each individual batch.",label="gradient_accumulation",value=4,interactive=True)
1423
+ with gr.Row():
1424
+ quantization = gr.Dropdown(info="Quantization is a technique used to reduce the precision of numerical values, typically from 32-bit floating-point numbers to lower bit representations.",label="quantization",choices=[4,8],value=8,interactive=True)
1425
+ lora_r = gr.Number(info="LoRA_r is a hyperparameter associated with the rank of the low-rank approximation used in LoRA.",label="lora_r",value=16,interactive=True)
1426
+ lora_alpha = gr.Number(info="LoRA_alpha is a hyperparameter used in LoRA for controlling the strength of the adaptation.",label="lora_alpha",value=32,interactive=True)
1427
+ lora_dropout = gr.Number(info="LoRA_dropout is a hyperparameter used in LoRA to control the dropout rate during fine-tuning.",label="lora_dropout",value=.05,interactive=True)
1428
+ with gr.Row():
1429
+ edit_code=gr.Button("Advance Code Editing")
1430
+ with gr.Row():
1431
+ code_temp=gr.Code(visible=False)
1432
+ with gr.Row():
1433
+ parameter_alter=gr.Button("Fine-tune")
1434
+ with gr.Row():
1435
+ fin_com=gr.Label(visible=False)
1436
+ edit_code.click(code_show,model_name,code_temp)
1437
+ # On click finetune button
1438
+ parameter_alter.click(edit_model_parameter,[model_name,edit_code,code_temp,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout],model_name)
1439
+ model_name.change(custom_model,model_name,[code_temp,edit_code])
1440
+ with gr.Tab("Embedding model"):
1441
+ with gr.Row():
1442
+ embedding_model = gr.Dropdown(
1443
+ choices=[
1444
+ "BAAI/bge-base-en-v1.5",
1445
+ "dunzhang/stella_en_1.5B_v5",
1446
+ "dunzhang/stella_en_400M_v5",
1447
+ "nvidia/NV-Embed-v2",
1448
+ "Alibaba-NLP/gte-Qwen2-1.5B-instruct",
1449
+ ],
1450
+ label="Select the embedding model for fine-tuning",
1451
+ )
1452
+ loss_function = gr.Dropdown(
1453
+ choices=[
1454
+ "MultipleNegativesRankingLoss",
1455
+ "OnlineContrastiveLoss",
1456
+ "CoSENTLoss",
1457
+ "GISTEmbedLoss",
1458
+ "TripletLoss",
1459
+ ],
1460
+ label="Select the loss function",
1461
+ )
1462
+
1463
+ epoch=gr.Number(label="epochs",value=1,interactive=True,info="One complete pass through the entire training dataset during the training process.")
1464
+ batch_size=gr.Number(label="batch_size",value=8,interactive=True,info="The number of training examples used in one iteration of training.")
1465
+ with gr.Row():
1466
+ btn_emb = gr.Button("Fine-tune the embedding model")
1467
+
1468
+
1469
+ # with gr.Row():
1470
+ # with gr.Accordion(label="Expected data format according to loss function"):
1471
+ # loss_info = gr.Markdown(
1472
+ # """
1473
+ # # Expected data format according to loss function:
1474
+ # ### Format `data/emb_data.xlsx` | `data/emb_data.xlsx` accordingly.
1475
+
1476
+ # **MultipleNegativesRankingLoss:**
1477
+ # Expects data with columns: `anchor`, `positive`, `negative`.
1478
+ # - `anchor`: The sentence to be embedded.
1479
+ # - `positive`: A sentence semantically similar to the anchor.
1480
+ # - `negative`: A sentence semantically dissimilar to the anchor.
1481
+
1482
+ # **OnlineContrastiveLoss:**
1483
+ # Expects data with columns: `sentence1`, `sentence2`, `label`.
1484
+ # - `sentence1`, `sentence2`: Pairs of sentences.
1485
+ # - `label`: 1 if the sentences are similar, 0 if dissimilar.
1486
+
1487
+ # **CoSENTLoss:**
1488
+ # Expects data with columns: `sentence1`, `sentence2`, `score`.
1489
+ # - `sentence1`, `sentence2`: Pairs of sentences.
1490
+ # - `score`: A float value (e.g., 0-1) representing their similarity.
1491
+
1492
+ # **GISTEmbedLoss:**
1493
+ # Expects data with either:
1494
+ # - Columns: `anchor`, `positive`, `negative` (like TripletLoss).
1495
+ # - Columns: `anchor`, `positive` (for pairs of similar sentences).
1496
+
1497
+ # **TripletLoss:**
1498
+ # Expects data with columns: `anchor`, `positive`, `negative`.
1499
+ # - `anchor`: The sentence to be embedded.
1500
+ # - `positive`: A sentence semantically similar to the anchor.
1501
+ # - `negative`: A sentence semantically dissimilar to the anchor.
1502
+ # """
1503
+ # )
1504
+
1505
+
1506
+
1507
+ btn_emb.click(finetune_emb,[embedding_model, loss_function, epoch, batch_size], None)
1508
+ #***************************************************
1509
+ with gr.Tab("Testing Data Generation and RAG Customization"):
1510
+ from utils import save_params_to_file
1511
+ def ans_gen_fun(model_name,embedding_name,
1512
+ splitter_type_dropdown,chunk_size_slider,
1513
+ chunk_overlap_slider,separator_textbox,max_tokens_slider,save_as_fav,progress=gr.Progress()):
1514
+ if not os.path.exists(os.path.join("data","testing_dataset.xlsx")):
1515
+ gr.Warning("You need to create testing dataset first from Data collection.")
1516
+ return
1517
+ if save_as_fav:
1518
+ save_params_to_file(model_name,embedding_name,
1519
+ splitter_type_dropdown,chunk_size_slider,
1520
+ chunk_overlap_slider,separator_textbox,max_tokens_slider)
1521
+ if not os.path.exists(model_name):
1522
+ gr.Error("Model not found in local folder!!")
1523
+ import time
1524
+ from model_ret import calculate_rag_metrics
1525
+ progress(0, desc="Starting...")
1526
+ idx=1
1527
+ model_ques_ans_gen=[]
1528
+ df_temp=pd.read_excel(r"data/testing_dataset.xlsx")
1529
+ infer_model = model_chain(model_name,None,
1530
+ True,embedding_name,splitter_type_dropdown,chunk_size_slider,
1531
+ chunk_overlap_slider,separator_textbox,max_tokens_slider)
1532
+ rag_chain=infer_model.rag_chain_ret()
1533
+ for x in progress.tqdm(df_temp.values):
1534
+ model_ques_ans_gen.append({
1535
+ "id":idx,
1536
+ "question":x[0]
1537
+ ,'answer':rag_chain.ans_ret(x,rag_chain)
1538
+ , "contexts":x[2]
1539
+ , "ground_truths":x[1]
1540
+ })
1541
+ idx+=1
1542
+ temp=calculate_rag_metrics(model_ques_ans_gen,model_name)
1543
+ temp['Average Rating'] = temp.mean(axis=1)
1544
+ pd.DataFrame(temp).to_excel(os.path.join("model_ans",f"_{model_name+cur_time}.xlsx"),index=False)
1545
+ rag_metrics = ['answer_correctness', 'answer_similarity', 'answer_relevancy', 'faithfulness', 'context_recall', 'context_precision']
1546
+ new_df = pd.DataFrame({'Rag Metric': rag_metrics, 'Average Rating': temp.mean()})
1547
+
1548
+ return gr.BarPlot(
1549
+ new_df,
1550
+ x="Rag Metric",
1551
+ y="Average Rating",
1552
+ x_title="Rag Metric",
1553
+ y_title="Average Rating",
1554
+ title="RAG performance",
1555
+ tooltip=["Rag Metric", "Average Rating"],
1556
+ y_lim=[1, 200],
1557
+ width=150,
1558
+ # height=1000,
1559
+ visible=True
1560
+ )
1561
+ gr.Info("Generating answer from model is finished!!! Now, it is ready for human evaluation. Model answer is saved in \"model_ans\" folder. ")
1562
+
1563
+ gr.Markdown(""" # Instructions:\n
1564
+ In this page you can generate answer from fine-tuned models for human evaluation. The questions must be created using `Testing data generation` section of `Data collection` tab.
1565
+ """)
1566
+ with gr.Row():
1567
+ embedding_name=gr.Dropdown(choices=["BAAI/bge-base-en-v1.5","dunzhang/stella_en_1.5B_v5","dunzhang/stella_en_400M_v5",
1568
+ "nvidia/NV-Embed-v2","Alibaba-NLP/gte-Qwen2-1.5B-instruct"],
1569
+ label="Select the Embedding Model")
1570
+ splitter_type_dropdown = gr.Dropdown(choices=["character", "recursive", "token"],
1571
+ value="character", label="Splitter Type",interactive=True)
1572
+
1573
+ chunk_size_slider = gr.Slider(minimum=100, maximum=2000, value=500, step=50, label="Chunk Size")
1574
+ chunk_overlap_slider = gr.Slider(minimum=0, maximum=500, value=30, step=10, label="Chunk Overlap",interactive=True)
1575
+ separator_textbox = gr.Textbox(value="\n", label="Separator (e.g., newline '\\n')",interactive=True)
1576
+ max_tokens_slider = gr.Slider(minimum=100, maximum=5000, value=1000, step=100, label="Max Tokens",interactive=True)
1577
+ with gr.Row():
1578
+ save_as_fav=gr.Checkbox(label="Save this settings as favorite")
1579
+ model_name=gr.Dropdown(choices=os.listdir("models"),label="Select the Model")
1580
+ with gr.Row():
1581
+ ans_gen=gr.Button("Generate Answer for Testing Dataset")
1582
+ with gr.Row():
1583
+ plot = gr.BarPlot(visible=False)
1584
+ ans_gen.click(ans_gen_fun,[model_name,embedding_name,
1585
+ splitter_type_dropdown,chunk_size_slider,
1586
+ chunk_overlap_slider,separator_textbox,max_tokens_slider,save_as_fav],plot)
1587
+ #***************************************************Human evaluation
1588
+ import secrets
1589
+
1590
+ def generate_token():
1591
+ while True:
1592
+ token=secrets.token_hex(6)
1593
+ f=[x[:-5] for x in os.listdir("save_ques_ans")]
1594
+ if token not in f:
1595
+ data = {
1596
+ 'id': [],
1597
+ 'question': [],
1598
+ 'answer': []
1599
+ }
1600
+ df = pd.DataFrame(data)
1601
+ df.to_excel("save_ques_ans//"+str(token)+".xlsx", index=False)
1602
+ return gr.Label(label="Please keep the token for tracking question answer data",value=token,visible=True)
1603
+
1604
+ def bar_plot_fn():
1605
+ temp=score_report_bar()
1606
+ return gr.BarPlot(
1607
+ temp,
1608
+ x="Model Name",
1609
+ y="Average Rating",
1610
+ x_title="Model name",
1611
+ y_title="Average Rating",
1612
+ title="Model performance",
1613
+ tooltip=["Model Name", "Average Rating"],
1614
+ y_lim=[1, 200],
1615
+ width=150,
1616
+ # height=1000,
1617
+ visible=True
1618
+ )
1619
+ with gr.Tab("Human Evaluation"):
1620
+ def answer_updated(model_ans):
1621
+ df_ques_ans=pd.read_excel(os.path.join("model_ans",str(model_ans)))
1622
+ num=0
1623
+ print(df_ques_ans['id'][num],"**"*10)
1624
+ return [gr.Markdown(value=f"""# Model_name: {model_ans}
1625
+ # Number of questions: {len(df_ques_ans)}""",visible=True),
1626
+ gr.Label(value=str(df_ques_ans['id'][num])),
1627
+ gr.Label(value=str(df_ques_ans['question'][num])),
1628
+ gr.Label(value=str(df_ques_ans['answer'][num])),
1629
+ gr.Dropdown(visible=False),
1630
+ gr.Button(visible=False)
1631
+ ]
1632
+
1633
+ with gr.Row():
1634
+ new_user=gr.Button("New User")
1635
+ with gr.Row():
1636
+ new_user_token=gr.Label(visible=False)
1637
+ with gr.Row():
1638
+ token_key=gr.Textbox(label="Enter your Token")
1639
+ model_ans=gr.Dropdown(choices=os.listdir("model_ans"),label="Select the Model Answer for Human Evaluation")
1640
+ btn_1=gr.Button("Submit")
1641
+ gr.Markdown(""" # Instructions:
1642
+ In this section, humans evaluate the answers of the model given specific questions. Each answer is rated between 1 and 5 by anonymous students.
1643
+ Those values are saved in the `scrore_report` folder.
1644
+ """)
1645
+ lab_temp=gr.Markdown(visible=False)
1646
+
1647
+ with gr.Row():
1648
+ id=gr.Label(value="",label="ID")
1649
+ with gr.Row():
1650
+ ques=gr.Label(value="",label="Question")
1651
+ with gr.Row():
1652
+ ans=gr.Label(value="",label="Answer")
1653
+ with gr.Row():
1654
+ score = gr.Radio(choices=[1,2,3,4,5],label="Rating")
1655
+ with gr.Row():
1656
+ human_ans_btn=gr.Button("Show Answer From Other Evaluators")
1657
+ with gr.Row():
1658
+ human_ans_lab=gr.Label(label="Human Answer",visible=False)
1659
+ with gr.Row():
1660
+ btn = gr.Button("Save")
1661
+ question = gr.Button("Skip")
1662
+ # with gr.Row():
1663
+ # save_all_btn=gr.Button("Save all the data in dataframe")
1664
+ # with gr.Row():
1665
+ # move=gr.Number(label="Move to the question")
1666
+ # move_btn=gr.Button("move")
1667
+ with gr.Row():
1668
+ btn_plot=gr.Button("Plot Generation")
1669
+ with gr.Row():
1670
+ plot = gr.BarPlot(visible=False)
1671
+ btn_plot.click(bar_plot_fn, None, outputs=plot)
1672
+ btn_1.click(answer_updated,model_ans,[lab_temp,id,ques,ans,model_ans,btn_1])
1673
+ btn.click(score_save, inputs=[ques,ans,score,model_ans,token_key], outputs=[id,ques,ans])
1674
+ question.click(new_ques,model_ans,[id,ques,ans])
1675
+ # save_all_btn.click(save_all,model_ans,None)
1676
+ # move_btn.click(move_to,[move,model_ans],[id,ques,ans])
1677
+ def human_ans_func(id, ques):
1678
+ temp=all_contri_ans(id,ques)
1679
+ return [gr.Button("Show Answer from Other Evaluators"),gr.Label(value="\n".join(temp),visible=True)]
1680
+ human_ans_btn.click(human_ans_func,[id, ques],[human_ans_btn,human_ans_lab])
1681
+ new_user.click(generate_token,None,new_user_token)
1682
+
1683
+ #***************************************************
1684
+ infer_ragchain=None
1685
+ with gr.Tab("Inference"):
1686
+ def echo(message, history,model_name_local,model_name_online,
1687
+ inf_checkbox,embedding_name,splitter_type_dropdown,chunk_size_slider,
1688
+ chunk_overlap_slider,separator_textbox,max_tokens_slider):
1689
+ global infer_ragchain
1690
+ if infer_ragchain is None:
1691
+ gr.Info("Please wait!!! model is loading!!")
1692
+ if inf_checkbox:
1693
+ gr.Info("Model is loading from Huggingface!!")
1694
+ infer_ragchain = model_chain(model_name_local,model_name_online,
1695
+ inf_checkbox,embedding_name,splitter_type_dropdown,chunk_size_slider,
1696
+ chunk_overlap_slider,separator_textbox,max_tokens_slider)
1697
+ rag_chain=infer_ragchain.rag_chain_ret()
1698
+ return infer_ragchain.ans_ret(message,rag_chain)
1699
+ from utils import load_params_from_file
1700
+ saved_params = load_params_from_file()
1701
+ # If saved parameters exist, use them; otherwise, set default values
1702
+ default_model_name = saved_params['model_name'] if saved_params else "Llama"
1703
+ default_embedding_name = saved_params['embedding_name'] if saved_params else "BAAI/bge-base-en-v1.5"
1704
+ default_splitter_type = saved_params['splitter_type_dropdown'] if saved_params else "character"
1705
+ default_chunk_size = saved_params['chunk_size_slider'] if saved_params else 500
1706
+ default_chunk_overlap = saved_params['chunk_overlap_slider'] if saved_params else 30
1707
+ default_separator = saved_params['separator_textbox'] if saved_params else "\n"
1708
+ default_max_tokens = saved_params['max_tokens_slider'] if saved_params else 1000
1709
+ with gr.Row():
1710
+ def login_hug(token):
1711
+ from huggingface_hub import login
1712
+ login(token=token)
1713
+ login_hug(os.getenv('HF_TOKEN'))
1714
+ with gr.Row():
1715
+ embedding_name=gr.Dropdown(choices=["BAAI/bge-base-en-v1.5","dunzhang/stella_en_1.5B_v5","dunzhang/stella_en_400M_v5",
1716
+ "nvidia/NV-Embed-v2","Alibaba-NLP/gte-Qwen2-1.5B-instruct"],value=default_embedding_name,
1717
+ label="Select the Embedding Model")
1718
+ splitter_type_dropdown = gr.Dropdown(choices=["character", "recursive", "token"],
1719
+ value=default_splitter_type, label="Splitter Type",interactive=True)
1720
+
1721
+ chunk_size_slider = gr.Slider(minimum=100, maximum=2000, value=default_chunk_size, step=50, label="Chunk Size")
1722
+ chunk_overlap_slider = gr.Slider(minimum=0, maximum=500, value=default_chunk_overlap, step=10, label="Chunk Overlap",interactive=True)
1723
+ separator_textbox = gr.Textbox(value=default_separator, label="Separator (e.g., newline '\\n')",interactive=True)
1724
+ max_tokens_slider = gr.Slider(minimum=100, maximum=5000, value=default_max_tokens, step=100, label="Max Tokens",interactive=True)
1725
+
1726
+ inf_checkbox=gr.Checkbox(label="Do you want to use without fine-tuned model from Hugging face?")
1727
+ model_name_local=gr.Dropdown(choices=os.listdir("models"),visible=True,label="Select the fine-tuned LLM",value=default_model_name)
1728
+ model_name_online=gr.Dropdown(visible=False)
1729
+ def model_online_local_show(inf_checkbox):
1730
+ if inf_checkbox:
1731
+ return [gr.Dropdown(visible=False),
1732
+ gr.Dropdown(choices=["Zephyr","Llama","Mistral", "Phi", "Flant5"],
1733
+ label="Select the LLM from Huggingface",visible=True)]
1734
+ else:
1735
+ return [gr.Dropdown(choices=os.listdir("models"),label="Select the fine-tuned LLM",visible=True),
1736
+ gr.Dropdown(visible=False)]
1737
+
1738
+ inf_checkbox.change(model_online_local_show,[inf_checkbox],[model_name_local,model_name_online])
1739
+ gr.ChatInterface(fn=echo,
1740
+ additional_inputs=[model_name_local,model_name_online,inf_checkbox,embedding_name,
1741
+ splitter_type_dropdown,chunk_size_slider,
1742
+ chunk_overlap_slider,separator_textbox,max_tokens_slider],
1743
+ title="Chatbot")
1744
+ #----------------------------------------------
1745
+ with gr.Tab("Deployment"):
1746
+ gr.Markdown("""`deploy` folder has all the code for the deployment of the model.
1747
+ For installing dependencies use the following command: `pip install -r requirements.txt`.
1748
+ """)
1749
+ def deploy_func(model_name):
1750
+ import shutil
1751
+ import os
1752
+ src_folder = 'src'
1753
+ deploy_folder = 'deploy'
1754
+ files_to_copy = ['model_ret.py', 'create_retriever.py', 'inference.py']
1755
+ for file_name in files_to_copy:
1756
+ src_file_path = os.path.join(src_folder, file_name)
1757
+ dest_file_path = os.path.join(deploy_folder, file_name)
1758
+ param_list=load_params_from_file()
1759
+ param_list["model_name"]=model_name
1760
+ save_params_to_file(param_list)
1761
+ # f.write(f"{model_name}")
1762
+
1763
+
1764
+ model_name=gr.Dropdown(choices=os.listdir("models"),label="Select the Model")
1765
+ btn_model=gr.Button("Deploy")
1766
+ btn_model.click(deploy_func,model_name)
1767
+
1768
+ demo.launch(share=False)
create_retriever.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ from langchain_community.document_loaders import Docx2txtLoader, TextLoader, PyPDFLoader
4
+ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, TokenTextSplitter
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.retrievers import EnsembleRetriever
8
+ # from ragatouille import RAGPretrainedModel
9
+
10
+ # Function to load and process documents
11
+ def docs_return(flag):
12
+ directory_path = 'rag_data/'
13
+ docx_file_pattern = '*.docx'
14
+ pdf_file_pattern = '*.pdf'
15
+ txt_file_pattern = '*.txt'
16
+
17
+ docx_file_paths = glob.glob(directory_path + docx_file_pattern)
18
+ pdf_file_paths = glob.glob(directory_path + pdf_file_pattern)
19
+ txt_file_paths = glob.glob(directory_path + txt_file_pattern)
20
+
21
+ all_doc, all_doc2 = [], []
22
+
23
+ for x in docx_file_paths:
24
+ loader = Docx2txtLoader(x)
25
+ documents = loader.load()
26
+ all_doc.extend(documents)
27
+ all_doc2.append(str(documents[0].page_content))
28
+
29
+ for x in pdf_file_paths:
30
+ loader = PyPDFLoader(x, extract_images=True)
31
+ docs_lazy = loader.lazy_load()
32
+ documents = []
33
+ for doc in docs_lazy:
34
+ documents.append(doc)
35
+ all_doc.extend(documents)
36
+ all_doc2.append(str(documents[0].page_content))
37
+
38
+ for x in txt_file_paths:
39
+ loader = TextLoader(x)
40
+ documents = loader.load()
41
+ all_doc.extend(documents)
42
+ all_doc2.append(str(documents[0].page_content))
43
+
44
+ docs = '\n\n'.join(all_doc2)
45
+
46
+ return all_doc if flag == 0 else docs
47
+
48
+ # Function to get or download the embedding model
49
+ def get_embedding_model(model_name):
50
+ local_model_path = f"embedding_model/{model_name.replace('/', '_')}"
51
+ if os.path.exists(local_model_path):
52
+ print(f"Loading local model from {local_model_path}")
53
+ return HuggingFaceEmbeddings(model_name=local_model_path)
54
+ else:
55
+ print(f"Downloading model {model_name}")
56
+ return HuggingFaceEmbeddings(model_name=model_name)
57
+
58
+ # Function to return different types of text splitters
59
+ def get_text_splitter(splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
60
+ if splitter_type == 'character':
61
+ return CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator)
62
+ elif splitter_type == 'recursive':
63
+ return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
64
+ elif splitter_type == 'token':
65
+ return TokenTextSplitter(chunk_size=max_tokens, chunk_overlap=chunk_overlap)
66
+ else:
67
+ raise ValueError("Unsupported splitter type. Choose from 'character', 'recursive', or 'token'.")
68
+
69
+ # Retriever using Chroma and HuggingFace embeddings
70
+ def retriever_chroma(flag, model_name="BAAI/bge-large-en-v1.5", splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
71
+ # Load or download the embedding model
72
+ embeddings = get_embedding_model(model_name)
73
+
74
+ if not flag:
75
+ # Load the documents
76
+ all_doc = docs_return(0)
77
+
78
+ # Use the splitter parameters
79
+ text_splitter = get_text_splitter(splitter_type=splitter_type, chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator, max_tokens=max_tokens)
80
+
81
+ # Split the documents using the text splitter
82
+ docs = text_splitter.split_documents(documents=all_doc)
83
+
84
+ # Create a Chroma vector database
85
+ vectordb = Chroma.from_documents(docs, embeddings, persist_directory="./chroma_db")
86
+
87
+ # Create the retriever
88
+ chroma_retriever = vectordb.as_retriever(
89
+ search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}
90
+ )
91
+ return chroma_retriever
92
+ else:
93
+ # Load a local Chroma vectorstore
94
+ vectordb = Chroma.load_local("vectorstore", embeddings)
95
+ chroma_retriever = vectordb.as_retriever(
96
+ search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}
97
+ )
98
+ return chroma_retriever
data/demo_table_data.xlsx ADDED
Binary file (11.1 kB). View file
 
data/demo_test_data.xlsx ADDED
Binary file (12.4 kB). View file
 
data/emb_data.xlsx ADDED
Binary file (4.97 kB). View file
 
data/existing_dataset.xlsx ADDED
Binary file (10.5 kB). View file
 
data/finetune_data.xlsx ADDED
Binary file (5 kB). View file
 
data/testing_dataset.xlsx ADDED
Binary file (27.5 kB). View file
 
data/validation-kuetllm_tanim - Copy.xlsx ADDED
Binary file (11.2 kB). View file
 
data_ret.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import WebBaseLoader
2
+ def data_ret2(link):
3
+ start=1
4
+ loader = WebBaseLoader(f"{link}")
5
+ data = loader.load()
6
+ return data
deploy/create_retriever.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ from langchain_community.document_loaders import Docx2txtLoader, TextLoader
4
+ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, TokenTextSplitter
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.retrievers import EnsembleRetriever
8
+ from ragatouille import RAGPretrainedModel
9
+
10
+ # Function to load and process documents
11
+ def docs_return(flag):
12
+ directory_path = 'rag_data/'
13
+ docx_file_pattern = '*.docx'
14
+ txt_file_pattern = '*.txt'
15
+
16
+ docx_file_paths = glob.glob(directory_path + docx_file_pattern)
17
+ txt_file_paths = glob.glob(directory_path + txt_file_pattern)
18
+
19
+ all_doc, all_doc2 = [], []
20
+
21
+ for x in docx_file_paths:
22
+ loader = Docx2txtLoader(x)
23
+ documents = loader.load()
24
+ all_doc.extend(documents)
25
+ all_doc2.append(str(documents[0].page_content))
26
+
27
+ for x in txt_file_paths:
28
+ loader = TextLoader(x)
29
+ documents = loader.load()
30
+ all_doc.extend(documents)
31
+ all_doc2.append(str(documents[0].page_content))
32
+
33
+ docs = '\n\n'.join(all_doc2)
34
+
35
+ return all_doc if flag == 0 else docs
36
+
37
+ # Function to get or download the embedding model
38
+ def get_embedding_model(model_name):
39
+ local_model_path = f"embedding_model/{model_name.replace('/', '_')}"
40
+ if os.path.exists(local_model_path):
41
+ print(f"Loading local model from {local_model_path}")
42
+ return HuggingFaceEmbeddings(model_name=local_model_path)
43
+ else:
44
+ print(f"Downloading model {model_name}")
45
+ return HuggingFaceEmbeddings(model_name=model_name)
46
+
47
+ # Function to return different types of text splitters
48
+ def get_text_splitter(splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
49
+ if splitter_type == 'character':
50
+ return CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator)
51
+ elif splitter_type == 'recursive':
52
+ return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
53
+ elif splitter_type == 'token':
54
+ return TokenTextSplitter(chunk_size=max_tokens, chunk_overlap=chunk_overlap)
55
+ else:
56
+ raise ValueError("Unsupported splitter type. Choose from 'character', 'recursive', or 'token'.")
57
+
58
+ # Retriever using Chroma and HuggingFace embeddings
59
+ def retriever_chroma(flag, model_name="BAAI/bge-large-en-v1.5", splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
60
+ # Load or download the embedding model
61
+ embeddings = get_embedding_model(model_name)
62
+
63
+ if not flag:
64
+ # Load the documents
65
+ all_doc = docs_return(0)
66
+
67
+ # Use the splitter parameters
68
+ text_splitter = get_text_splitter(splitter_type=splitter_type, chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator, max_tokens=max_tokens)
69
+
70
+ # Split the documents using the text splitter
71
+ docs = text_splitter.split_documents(documents=all_doc)
72
+
73
+ # Create a Chroma vector database
74
+ vectordb = Chroma.from_documents(docs, embeddings, persist_directory="./chroma_db")
75
+
76
+ # Create the retriever
77
+ chroma_retriever = vectordb.as_retriever(
78
+ search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}
79
+ )
80
+ return chroma_retriever
81
+ else:
82
+ # Load a local Chroma vectorstore
83
+ vectordb = Chroma.load_local("vectorstore", embeddings)
84
+ chroma_retriever = vectordb.as_retriever(
85
+ search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}
86
+ )
87
+ return chroma_retriever
88
+
89
+ # ColBERT retriever
90
+ def colbert_retriever():
91
+ docs = docs_return(1)
92
+ RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
93
+ RAG.index(
94
+ collection=[docs],
95
+ index_name="ensemble_colbert",
96
+ max_document_length=256,
97
+ split_documents=True,
98
+ )
99
+ retriever = RAG.as_langchain_retriever(k=3)
100
+ return retriever
101
+
102
+ # Ensemble retriever
103
+ def ensemble_retriever(model_name="BAAI/bge-large-en-v1.5", splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
104
+ retriever1 = colbert_retriever()
105
+ retriever2 = retriever_chroma(False, model_name=model_name, splitter_type=splitter_type, chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator, max_tokens=max_tokens)
106
+ retriever = EnsembleRetriever(retrievers=[retriever1, retriever2], weights=[0.50, 0.50])
107
+ return retriever
108
+
109
+ # Example usage:
110
+ # dat = ensemble_retriever(model_name="sentence-transformers/all-mpnet-base-v2", splitter_type='token', chunk_size=500)
111
+ # data = dat.invoke("What is KUET?")
112
+ # context = ""
113
+ # for x in data[:2]:
114
+ # context += (x.page_content) + "\n"
deploy/inference.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import pandas as pd
4
+ import transformers
5
+ from pynvml import *
6
+ import torch
7
+ from langchain import hub
8
+ from model_ret import zephyr_model,llama_model,mistral_model,phi_model,flant5_model
9
+ from langchain_core.output_parsers import StrOutputParser
10
+ from langchain_core.runnables import RunnablePassthrough
11
+ from create_retriever import ensemble_retriever
12
+ # HuggingFace model mapping
13
+ hf_model_map = {
14
+ "Zephyr": "HuggingFaceH4/zephyr-7b-beta",
15
+ "Llama": "NousResearch/Meta-Llama-3-8B",
16
+ "Mistral": "unsloth/mistral-7b-instruct-v0.3",
17
+ "Phi": "microsoft/Phi-3-mini-4k-instruct",
18
+ "Flant5": "google/flan-t5-base"
19
+ }
20
+
21
+ # Model chain class
22
+ class model_chain:
23
+ model_name = ""
24
+
25
+ def __init__(self,
26
+ model_name_local,
27
+ model_name_online="Llama",
28
+ use_local=True,
29
+ embedding_name="BAAI/bge-base-en-v1.5",
30
+ splitter_type_dropdown="character",
31
+ chunk_size_slider=512,
32
+ chunk_overlap_slider=30,
33
+ separator_textbox="\n",
34
+ max_tokens_slider=2048) -> None:
35
+ if use_local:
36
+ quantization, self.model_name = model_name_local.split("_")[0], model_name_local.split("_")[1]
37
+ model_name_temp = model_name_local
38
+ else:
39
+ self.model_name = model_name_online
40
+ model_name_temp = hf_model_map[model_name_online]
41
+
42
+ if self.model_name == "Zephyr":
43
+ self.llm = zephyr_model(model_name_temp, quantization, use_local=use_local)
44
+ elif self.model_name == "Llama":
45
+ self.llm = llama_model(model_name_temp, quantization, use_local=use_local)
46
+ elif self.model_name == "Mistral":
47
+ self.llm = mistral_model(model_name_temp, quantization, use_local=use_local)
48
+ elif self.model_name == "Phi":
49
+ self.llm = phi_model(model_name_temp, quantization, use_local=use_local)
50
+ elif self.model_name == "Flant5":
51
+ self.tokenizer, self.model, self.llm = flant5_model(model_name_temp, use_local=use_local)
52
+
53
+ # Creating the retriever
54
+ self.retriever = ensemble_retriever(embedding_name,
55
+ splitter_type=splitter_type_dropdown,
56
+ chunk_size=chunk_size_slider,
57
+ chunk_overlap=chunk_overlap_slider,
58
+ separator=separator_textbox,
59
+ max_tokens=max_tokens_slider)
60
+
61
+ # Defining the RAG chain
62
+ prompt = hub.pull("rlm/rag-prompt")
63
+ self.rag_chain = (
64
+ {"context": self.retriever | self.format_docs, "question": RunnablePassthrough()}
65
+ | prompt
66
+ | self.llm
67
+ | StrOutputParser()
68
+ )
69
+
70
+ # Helper function to format documents
71
+ def format_docs(self, docs):
72
+ return "\n\n".join(doc.page_content for doc in docs)
73
+
74
+ # Retrieve RAG chain
75
+ def rag_chain_ret(self):
76
+ return self.rag_chain
77
+
78
+ # Answer retrieval function
79
+ def ans_ret(self, inp, rag_chain):
80
+ if self.model_name == 'Flant5':
81
+ my_question = "What is KUET?"
82
+ data = self.retriever.invoke(inp)
83
+ context = ""
84
+ for x in data[:2]:
85
+ context += (x.page_content) + "\n"
86
+ inputs = f"""Please answer to this question using this context:\n{context}\n{my_question}"""
87
+ inputs = self.tokenizer(inputs, return_tensors="pt")
88
+ outputs = self.model.generate(**inputs)
89
+ answer = self.tokenizer.decode(outputs[0])
90
+ from textwrap import fill
91
+ ans = fill(answer, width=100)
92
+ return ans
93
+
94
+ ans = rag_chain.invoke(inp)
95
+ ans = ans.split("Answer:")[1]
96
+ return ans
97
+
98
+ # def model_push(hf):
99
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
100
+ # if model_name=="Mistral":
101
+ # path="models/full_KUET_LLM_mistral"
102
+ # elif model_name=="Zepyhr":
103
+ # path="models/full_KUET_LLM_zepyhr"
104
+ # elif model_name=="Llama2":
105
+ # path="models/full_KUET_LLM_llama"
106
+ # tokenizer = AutoTokenizer.from_pretrained(path)
107
+ # model = AutoModelForCausalLM.from_pretrained(path,
108
+ # device_map='auto',
109
+ # torch_dtype=torch.float16,
110
+ # use_auth_token=True,
111
+ # load_in_8bit=True,
112
+ # # load_in_4bit=True
113
+ # )
114
+ # model.push_to_hub(repo_id=f"My_model",token=hf)
115
+ # tokenizer.push_to_hub(repo_id=f"My_model",token=hf)
116
+
117
+
118
+
119
+
deploy/main.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from utils import load_params_from_file
4
+ from inference import model_chain
5
+
6
+ infer_ragchain = None
7
+
8
+ # Define the main interface logic
9
+ def echo(message, history, model_name_local, model_name_online,
10
+ inf_checkbox, embedding_name, splitter_type_dropdown, chunk_size_slider,
11
+ chunk_overlap_slider, separator_textbox, max_tokens_slider):
12
+ global infer_ragchain
13
+ if infer_ragchain is None:
14
+ gr.Info("Please wait!!! model is loading!!")
15
+ if inf_checkbox:
16
+ gr.info("local model is loading!!")
17
+ infer_ragchain = model_chain(model_name_local, model_name_online,
18
+ inf_checkbox, embedding_name, splitter_type_dropdown, chunk_size_slider,
19
+ chunk_overlap_slider, separator_textbox, max_tokens_slider)
20
+ rag_chain = infer_ragchain.rag_chain_ret()
21
+ return infer_ragchain.ans_ret(message, rag_chain)
22
+
23
+ # Load saved parameters if available
24
+ saved_params = load_params_from_file()
25
+
26
+ # Set default values
27
+ default_embedding_name = saved_params['embedding_name'] if saved_params else "BAAI/bge-base-en-v1.5"
28
+ default_splitter_type = saved_params['splitter_type_dropdown'] if saved_params else "character"
29
+ default_chunk_size = saved_params['chunk_size_slider'] if saved_params else 500
30
+ default_chunk_overlap = saved_params['chunk_overlap_slider'] if saved_params else 30
31
+ default_separator = saved_params['separator_textbox'] if saved_params else "\n"
32
+ default_max_tokens = saved_params['max_tokens_slider'] if saved_params else 1000
33
+
34
+ # Initialize the Gradio Interface
35
+ with gr.Blocks() as demo:
36
+ with gr.Tab("Inference"):
37
+ with gr.Row():
38
+ embedding_name = gr.Dropdown(choices=["BAAI/bge-base-en-v1.5", "dunzhang/stella_en_1.5B_v5", "dunzhang/stella_en_400M_v5",
39
+ "nvidia/NV-Embed-v2", "Alibaba-NLP/gte-Qwen2-1.5B-instruct"],
40
+ value=default_embedding_name, label="Select the Embedding Model")
41
+ splitter_type_dropdown = gr.Dropdown(choices=["character", "recursive", "token"],
42
+ value=default_splitter_type, label="Splitter Type", interactive=True)
43
+
44
+ chunk_size_slider = gr.Slider(minimum=100, maximum=2000, value=default_chunk_size, step=50, label="Chunk Size")
45
+ chunk_overlap_slider = gr.Slider(minimum=0, maximum=500, value=default_chunk_overlap, step=10, label="Chunk Overlap", interactive=True)
46
+ separator_textbox = gr.Textbox(value=default_separator, label="Separator (e.g., newline '\\n')", interactive=True)
47
+ max_tokens_slider = gr.Slider(minimum=100, maximum=5000, value=default_max_tokens, step=100, label="Max Tokens", interactive=True)
48
+
49
+ inf_checkbox = gr.Checkbox(label="Do you want to use a fine-tuned model?")
50
+ model_name_local = gr.Dropdown(visible=False)
51
+ model_name_online = gr.Dropdown(choices=["Zephyr", "Llama", "Mistral", "Phi", "Flant5"],
52
+ label="Select the LLM from Huggingface", visible=True)
53
+
54
+ # Function to toggle model selection between local and online based on checkbox
55
+ def model_online_local_show(inf_checkbox):
56
+ if inf_checkbox:
57
+ return [gr.Dropdown(choices=os.listdir("models"), label="Select the local LLM", visible=True),
58
+ gr.Dropdown(visible=False)]
59
+ else:
60
+ return [gr.Dropdown(visible=False),
61
+ gr.Dropdown(choices=["Zephyr", "Llama", "Mistral", "Phi", "Flant5"],
62
+ label="Select the LLM from Huggingface", visible=True)]
63
+
64
+ # Event listener to switch between local and online models
65
+ inf_checkbox.change(model_online_local_show, [inf_checkbox], [model_name_local, model_name_online])
66
+
67
+ # Chat interface
68
+ gr.ChatInterface(fn=echo,
69
+ additional_inputs=[model_name_local, model_name_online, inf_checkbox, embedding_name,
70
+ splitter_type_dropdown, chunk_size_slider,
71
+ chunk_overlap_slider, separator_textbox, max_tokens_slider],
72
+ title="Chatbot")
73
+
74
+ # Launch the demo
75
+ demo.launch()
deploy/model_ret.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, T5Tokenizer, T5ForConditionalGeneration, pipeline
2
+ from langchain import HuggingFacePipeline
3
+ import os
4
+ import torch
5
+
6
+ def load_model_and_pipeline(model_info, quantization=4, is_t5=False, use_local=True):
7
+ # Check if the model is local or should be downloaded from Hugging Face
8
+ # if use_local:
9
+ # path = f"models/{model_info}"
10
+ # if not os.path.exists(path):
11
+ # print(f"Local model not found at {path}. Downloading from Hugging Face...")
12
+ # use_local = False # Fallback to Hugging Face download if local not found
13
+ # if not use_local:
14
+ # # Replace model_info with the corresponding Hugging Face repo name
15
+ # hf_model_map = {
16
+ # "zephyr-7b-beta": "HuggingFaceH4/zephyr-7b-beta",
17
+ # "llama-3-8b": "NousResearch/Meta-Llama-3-8B",
18
+ # "mistral-7b": "unsloth/mistral-7b-instruct-v0.3",
19
+ # "phi-3-mini": "microsoft/Phi-3-mini-4k-instruct",
20
+ # "flan-t5-base": "google/flan-t5-base"
21
+ # }
22
+ # path = hf_model_map.get(model_info.split("_")[1], model_info)
23
+
24
+ tokenizer = AutoTokenizer.from_pretrained(model_info, use_auth_token=True)
25
+
26
+ if quantization == "8":
27
+ model = AutoModelForCausalLM.from_pretrained(
28
+ model_info,
29
+ device_map='auto',
30
+ torch_dtype=torch.float16,
31
+ use_auth_token=True,
32
+ load_in_8bit=True
33
+ )
34
+ else:
35
+ model = AutoModelForCausalLM.from_pretrained(
36
+ model_info,
37
+ device_map='auto',
38
+ torch_dtype=torch.float16,
39
+ use_auth_token=True,
40
+ load_in_4bit=True
41
+ )
42
+
43
+ if is_t5:
44
+ model = T5ForConditionalGeneration.from_pretrained(model_info)
45
+ tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
46
+
47
+ pipe = pipeline(
48
+ "text-generation",
49
+ model=model,
50
+ tokenizer=tokenizer,
51
+ torch_dtype=torch.bfloat16,
52
+ device_map="auto",
53
+ max_new_tokens=512,
54
+ do_sample=True,
55
+ top_k=30,
56
+ num_return_sequences=1,
57
+ eos_token_id=tokenizer.eos_token_id
58
+ )
59
+
60
+ llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature': 0})
61
+ return tokenizer, model, llm
62
+
63
+ def zephyr_model(model_info, quantization, use_local=True):
64
+ return load_model_and_pipeline(model_info, quantization, use_local=use_local)
65
+
66
+ def llama_model(model_info, quantization, use_local=True):
67
+ return load_model_and_pipeline(model_info, quantization, use_local=use_local)
68
+
69
+ def mistral_model(model_info, quantization, use_local=True):
70
+ return load_model_and_pipeline(model_info, quantization, use_local=use_local)
71
+
72
+ def phi_model(model_info, quantization, use_local=True):
73
+ return load_model_and_pipeline(model_info, quantization, use_local=use_local)
74
+
75
+ def flant5_model(model_info, use_local=True):
76
+ return load_model_and_pipeline(model_info, is_t5=True, use_local=use_local)
77
+
78
+
79
+ import pandas as pd
80
+ from datasets import Dataset
81
+
82
+ def calculate_rag_metrics(model_ques_ans_gen, llm_model, embedding_model="BAAI/bge-base-en-v1.5"):
83
+ # Create a dictionary from the model_ques_ans_gen list
84
+ from ragas import evaluate
85
+ from ragas.metrics import faithfulness, answer_correctness,answer_similarity,answer_relevancy,context_recall, context_precision
86
+ data_samples = {
87
+ 'question': [item['question'] for item in model_ques_ans_gen],
88
+ 'answer': [item['answer'] for item in model_ques_ans_gen],
89
+ 'contexts': [item['contexts'] for item in model_ques_ans_gen],
90
+ 'ground_truths': [item['ground_truths'] for item in model_ques_ans_gen]
91
+ }
92
+
93
+ # Convert the dictionary to a pandas DataFrame
94
+ rag_df = pd.DataFrame(data_samples)
95
+
96
+ # Convert the DataFrame to a HuggingFace Dataset
97
+ rag_eval_dataset = Dataset.from_pandas(rag_df)
98
+
99
+ # Define the list of metrics to calculate
100
+ metrics = [
101
+ "answer_correctness", "answer_similarity",
102
+ "answer_relevancy", "faithfulness",
103
+ "context_recall", "context_precision"
104
+ ]
105
+
106
+ # Perform the evaluation using the provided LLM and embedding models
107
+ result = evaluate(
108
+ rag_eval_dataset,
109
+ metrics=metrics,
110
+ llm=llm_model,
111
+ embeddings=embedding_model
112
+ )
113
+ result.to_pandas()
114
+ return result
deploy/params.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "M1", "embedding_name": "BAAI/bge-base-en-v1.5", "splitter_type_dropdown": "character", "chunk_size_slider": 500, "chunk_overlap_slider": 30, "separator_textbox": "\n", "max_tokens_slider": 1000}
embedding_tuner.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset, Dataset
2
+ from sentence_transformers import (
3
+ SentenceTransformer,
4
+ SentenceTransformerTrainer,
5
+ SentenceTransformerTrainingArguments,
6
+ )
7
+ from sentence_transformers.losses import (
8
+ MultipleNegativesRankingLoss,
9
+ OnlineContrastiveLoss,
10
+ CoSENTLoss,
11
+ GISTEmbedLoss,
12
+ TripletLoss,
13
+ )
14
+ import pandas as pd
15
+
16
+
17
+ class EmbeddingFinetuner:
18
+ """
19
+ A class for finetuning SentenceTransformer models on various loss functions.
20
+
21
+ Supports the following loss functions:
22
+ - MultipleNegativesRankingLoss
23
+ - OnlineContrastiveLoss
24
+ - CoSENTLoss
25
+ - GISTEmbedLoss
26
+ - TripletLoss
27
+
28
+ Loads data from an xlsx file named "emb_data.xlsx".
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ model_name="microsoft/mpnet-base",
34
+ loss_function="MultipleNegativesRankingLoss",
35
+ epochs=1,
36
+ batch_size=16,
37
+ test_size=0.1,
38
+ ):
39
+ """
40
+ Initializes the EmbeddingFinetuner.
41
+
42
+ Args:
43
+ model_name (str): Name of the SentenceTransformer model to use.
44
+ loss_function (str): Name of the loss function to use.
45
+ epochs (int): Number of training epochs.
46
+ batch_size (int): Batch size for training.
47
+ test_size (float): Proportion of the dataset to include in the test split.
48
+ If less than 1, no test set is created.
49
+ """
50
+ self.model_name = model_name
51
+ self.loss_function = loss_function
52
+ self.epochs = epochs
53
+ self.batch_size = batch_size
54
+ self.test_size = test_size
55
+
56
+ self.model = SentenceTransformer(self.model_name)
57
+ self.train_dataset, self.dev_dataset, self.test_dataset = self._load_data()
58
+ self.loss = self._get_loss_function()
59
+
60
+ def _load_data(self):
61
+ """
62
+ Loads data from "emb_data.xlsx" and prepares it for the selected loss function.
63
+ """
64
+ df = pd.read_excel(f"data/emb_data.xlsx")
65
+
66
+ if self.loss_function == "MultipleNegativesRankingLoss":
67
+ """
68
+ Expects data in the format:
69
+ | anchor | positive | negative |
70
+ |---|---|---|
71
+ | sentence1 | sentence2 | sentence3 |
72
+ | ... | ... | ... |
73
+
74
+ Where 'anchor' is the sentence to be embedded, 'positive' is a sentence
75
+ semantically similar to the anchor, and 'negative' is a sentence
76
+ semantically dissimilar to the anchor.
77
+ """
78
+ dataset = Dataset.from_pandas(df)
79
+
80
+ elif self.loss_function == "OnlineContrastiveLoss":
81
+ """
82
+ Expects data in the format:
83
+ | sentence1 | sentence2 | label |
84
+ |---|---|---|
85
+ | sentenceA | sentenceB | 1 |
86
+ | sentenceC | sentenceD | 0 |
87
+ | ... | ... | ... |
88
+
89
+ Where 'sentence1' and 'sentence2' are pairs of sentences, and 'label'
90
+ indicates whether they are semantically similar (1) or dissimilar (0).
91
+ """
92
+ dataset = Dataset.from_pandas(df)
93
+
94
+ elif self.loss_function == "CoSENTLoss":
95
+ """
96
+ Expects data in the format:
97
+ | sentence1 | sentence2 | score |
98
+ |---|---|---|
99
+ | sentenceA | sentenceB | 0.8 |
100
+ | sentenceC | sentenceD | 0.2 |
101
+ | ... | ... | ... |
102
+
103
+ Where 'sentence1' and 'sentence2' are pairs of sentences, and 'score'
104
+ is a float value representing their similarity (e.g., from 0 to 1).
105
+ """
106
+ dataset = Dataset.from_pandas(df)
107
+
108
+ elif self.loss_function == "GISTEmbedLoss":
109
+ """
110
+ Expects data in either of the following formats:
111
+
112
+ Triplets:
113
+ | anchor | positive | negative |
114
+ |---|---|---|
115
+ | sentence1 | sentence2 | sentence3 |
116
+ | ... | ... | ... |
117
+
118
+ Pairs:
119
+ | anchor | positive |
120
+ |---|---|
121
+ | sentence1 | sentence2 |
122
+ | ... | ... |
123
+
124
+ Where 'anchor' is the sentence to be embedded, 'positive' is a sentence
125
+ semantically similar to the anchor, and 'negative' (if present) is a
126
+ sentence semantically dissimilar to the anchor.
127
+ """
128
+ dataset = Dataset.from_pandas(df)
129
+
130
+ elif self.loss_function == "TripletLoss":
131
+ """
132
+ Expects data in the format:
133
+ | anchor | positive | negative |
134
+ |---|---|---|
135
+ | sentence1 | sentence2 | sentence3 |
136
+ | ... | ... | ... |
137
+
138
+ Where 'anchor' is the sentence to be embedded, 'positive' is a sentence
139
+ semantically similar to the anchor, and 'negative' is a sentence
140
+ semantically dissimilar to the anchor.
141
+ """
142
+ dataset = Dataset.from_pandas(df)
143
+
144
+ else:
145
+ raise ValueError(f"Unsupported loss function: {self.loss_function}")
146
+
147
+ # Split into train and dev
148
+ train_dev_dataset = dataset.train_test_split(test_size=self.test_size)
149
+ train_dataset = train_dev_dataset["train"]
150
+ dev_dataset = train_dev_dataset["test"]
151
+ test_dataset = None
152
+
153
+ return train_dataset, dev_dataset, test_dataset
154
+
155
+ def _get_loss_function(self):
156
+ """
157
+ Returns the selected loss function instance.
158
+ """
159
+ if self.loss_function == "MultipleNegativesRankingLoss":
160
+ return MultipleNegativesRankingLoss(self.model)
161
+ elif self.loss_function == "OnlineContrastiveLoss":
162
+ return OnlineContrastiveLoss(self.model)
163
+ elif self.loss_function == "CoSENTLoss":
164
+ return CoSENTLoss(self.model)
165
+ elif self.loss_function == "GISTEmbedLoss":
166
+ guide_model = SentenceTransformer("all-MiniLM-L6-v2") # You can change this
167
+ return GISTEmbedLoss(self.model, guide_model)
168
+ elif self.loss_function == "TripletLoss":
169
+ return TripletLoss(self.model)
170
+ else:
171
+ raise ValueError(f"Unsupported loss function: {self.loss_function}")
172
+
173
+ def train(self):
174
+ """
175
+ Trains the SentenceTransformer model using the specified loss function.
176
+ """
177
+ args = SentenceTransformerTrainingArguments(
178
+ output_dir=f"models/{self.model_name}-{self.loss_function}",
179
+ num_train_epochs=self.epochs,
180
+ per_device_train_batch_size=self.batch_size,
181
+ per_device_eval_batch_size=self.batch_size,
182
+ evaluation_strategy="epoch",
183
+ # ... other training arguments as needed ...
184
+ )
185
+
186
+ trainer = SentenceTransformerTrainer(
187
+ model=self.model,
188
+ args=args,
189
+ train_dataset=self.train_dataset,
190
+ eval_dataset=self.dev_dataset,
191
+ loss=self.loss,
192
+ )
193
+ trainer.train()
194
+
195
+ # Save the trained model
196
+ self.model.save_pretrained(
197
+ f"models/emb-{self.model_name}-{self.loss_function}"
198
+ )
199
+
200
+ return True
201
+
202
+
fine_tune_file/finetune_file.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #code changed
2
+ import os
3
+ import torch
4
+ from datasets import load_dataset, Dataset
5
+ import pandas as pd
6
+ import transformers
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
8
+ from trl import SFTTrainer
9
+ import transformers
10
+ # from peft import AutoPeftModelForCausalLM
11
+ from transformers import GenerationConfig
12
+ from pynvml import *
13
+ import glob
14
+ class custom_model_trainer:
15
+ lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout=5e-6,3,4,4,4,16,32,.05 # setup the parameter accoring to your model.
16
+ def formatted_text(self, x ,tokenizer):
17
+ # change this templete according to your model
18
+ #Example:
19
+ temp = [
20
+ {"role": "user", "content": """You are a helpful chatbot, help users by answering their queries.
21
+ Question: """ + x["question"]},
22
+ {"role": "assistant", "content": x["answer"]}
23
+ ]
24
+ return tokenizer.apply_chat_template(temp, add_generation_prompt=False, tokenize=False)
25
+ def custom_model_finetune(self):
26
+ base_model = 'Qwen/Qwen2.5-0.5B-Instruct' # Write the base model repo name from huggingface
27
+ lora_output = f'models/q{self.quantization}_mymodel_lora' # Write the folder name for saving lora output
28
+ full_output = f'models/q{self.quantization}_mymodel_full' # Write the folder name for saving full model output
29
+ DEVICE = 'cuda'
30
+ tokenizer = AutoTokenizer.from_pretrained(base_model)
31
+ tokenizer.padding_side = 'right'
32
+ ### read csv with Prompt, Answer pair
33
+ data_location = r"data/finetune_data.xlsx" ## replace here
34
+ data_df=pd.read_excel( data_location )
35
+ ### set formatting
36
+ data_df["text"] = data_df[["question", "answer"]].apply(lambda x: self.formatted_text(x,tokenizer), axis=1) ## replace Prompt and Answer if collected dataset has different column names
37
+ print(data_df.iloc[0])
38
+ dataset = Dataset.from_pandas(data_df)
39
+
40
+
41
+ # set quantization config
42
+ if self.quantization == '8':
43
+ bnb_config = BitsAndBytesConfig(
44
+ load_in_8bit= True,
45
+ )
46
+ else:
47
+ bnb_config = BitsAndBytesConfig(
48
+ load_in_4bit= True,
49
+ bnb_4bit_use_double_quant=True,
50
+ bnb_4bit_quant_type="nf4",
51
+ bnb_4bit_compute_dtype=torch.bfloat16
52
+ )
53
+ model = AutoModelForCausalLM.from_pretrained(
54
+ base_model,
55
+ quantization_config=bnb_config,
56
+ torch_dtype=torch.bfloat16,
57
+ device_map="auto",
58
+ trust_remote_code=True,
59
+ )
60
+ model.config.use_cache = False # silence the warnings
61
+ model.config.pretraining_tp = 1
62
+ model.gradient_checkpointing_enable()
63
+
64
+ tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
65
+ tokenizer.padding_side = 'right'
66
+ tokenizer.pad_token = tokenizer.eos_token
67
+ tokenizer.add_eos_token = True
68
+ # tokenizer.add_bos_token, tokenizer.add_eos_token
69
+
70
+ # Set PEFT adapter config (16:32)
71
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
72
+
73
+ # target modules are currently selected for zephyr base model
74
+ config = LoraConfig(
75
+ r= self.lora_r if self.lora_r else 16,
76
+ lora_alpha= self.lora_alpha if self.lora_alpha else 32,
77
+ target_modules=["q_proj", "v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"], # target all the linear layers for full finetuning
78
+ lora_dropout= self.lora_dropout if self.lora_dropout else 0.05,
79
+ bias="none",
80
+ task_type="CAUSAL_LM")
81
+
82
+ # stabilize output layer and layernorms
83
+ model = prepare_model_for_kbit_training(model, self.quantization)
84
+ # Set PEFT adapter on model (Last step)
85
+ model = get_peft_model(model, config)
86
+
87
+ # Set Hyperparameters
88
+ MAXLEN=512
89
+ BATCH_SIZE = self.batch_size if self.batch_size else 4
90
+ GRAD_ACC = self.gradient_accumulation if self.gradient_accumulation else 4
91
+ OPTIMIZER ='paged_adamw_8bit' # save memory
92
+ LR=self.lr if self.lr else 5e-06 # slightly smaller than pretraining lr | and close to LoRA standard
93
+
94
+
95
+ training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,
96
+ gradient_accumulation_steps=GRAD_ACC,
97
+ optim=OPTIMIZER,
98
+ learning_rate=LR,
99
+ fp16=True, # consider compatibility when using bf16
100
+ logging_steps=10,
101
+ num_train_epochs = self.epoch if self.epoch else 2,
102
+ output_dir=lora_output,
103
+ remove_unused_columns=True,
104
+ )
105
+
106
+ # Set collator
107
+ data_collator = transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
108
+
109
+ # Setup trainer
110
+ trainer = SFTTrainer(model=model,
111
+ train_dataset=dataset,
112
+ data_collator=data_collator,
113
+ args=training_config,
114
+ dataset_text_field="text",
115
+ # callbacks=[early_stop], need to learn, lora easily overfits
116
+ )
117
+
118
+ trainer.train()
119
+
120
+ trainer.save_model(lora_output)
121
+
122
+ # Get peft config
123
+ from peft import PeftConfig
124
+ config = PeftConfig.from_pretrained(lora_output)
125
+
126
+ model = transformers.AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
127
+
128
+ # Load the Lora model
129
+ from peft import PeftModel
130
+ model = PeftModel.from_pretrained(model, lora_output)
131
+
132
+ # Get tokenizer
133
+ tokenizer = transformers.AutoTokenizer.from_pretrained(config.base_model_name_or_path)
134
+ merged_model = model.merge_and_unload()
135
+
136
+ merged_model.save_pretrained(full_output)
137
+ tokenizer.save_pretrained(full_output)
138
+ print("*"*10,": Model is saved!!!")
fine_tune_file/flant5_finetune.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ import evaluate
3
+ import numpy as np
4
+ import pandas as pd
5
+ from datasets import load_dataset, Dataset
6
+ from transformers import T5Tokenizer, DataCollatorForSeq2Seq
7
+ from transformers import T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer
8
+ import os
9
+ from datetime import datetime
10
+ os.environ["WANDB_DISABLED"] = "true"
11
+
12
+ class flant5_trainer:
13
+ def __init__(self) -> None:
14
+ # Load the tokenizer, model, and data collator
15
+ MODEL_NAME = "google/flan-t5-base"
16
+ self.tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
17
+ self.model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME)
18
+ self.data_collator = DataCollatorForSeq2Seq(tokenizer=self.tokenizer, model=self.model)
19
+ nltk.download("punkt", quiet=True)
20
+ self.metric = evaluate.load("rouge")
21
+ def preprocess_function(self,examples):
22
+ """Add prefix to the sentences, tokenize the text, and set the labels"""
23
+ # The "inputs" are the tokenized answer:
24
+ inputs = [self.prefix + doc for doc in examples["question"]]
25
+ model_inputs = self.tokenizer(inputs, max_length=1024, truncation=True)
26
+
27
+ # The "labels" are the tokenized outputs:
28
+ labels = self.tokenizer(text_target=examples["answer"],
29
+ max_length=1024,
30
+ truncation=True)
31
+
32
+ model_inputs["labels"] = labels["input_ids"]
33
+ return model_inputs
34
+ def compute_metrics(self,eval_preds):
35
+ preds, labels = eval_preds
36
+
37
+ # decode preds and labels
38
+ labels = np.where(labels != -100, labels, self.tokenizer.pad_token_id)
39
+ decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True)
40
+ decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True)
41
+
42
+ # rougeLSum expects newline after each sentence
43
+ decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
44
+ decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]
45
+
46
+ result = self.metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
47
+
48
+ return result
49
+ def flant5_finetune(self,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout):
50
+
51
+ data_location = r"data/finetune_data.xlsx"
52
+ data_df=pd.read_excel(data_location)
53
+ dataset = Dataset.from_pandas(data_df)
54
+ # We prefix our tasks with "answer the question"
55
+ self.prefix = "Please answer this question: "
56
+
57
+ # Define the preprocessing function
58
+ dataset = dataset.train_test_split(test_size=0.1)
59
+ # Map the preprocessing function across our dataset
60
+ tokenized_dataset = dataset.map(self.preprocess_function, batched=True)
61
+
62
+ # Global Parameters
63
+
64
+ PER_DEVICE_EVAL_BATCH = 4
65
+ WEIGHT_DECAY = 0.01
66
+ SAVE_TOTAL_LIM = 3
67
+
68
+ # Set up training arguments
69
+ training_args = Seq2SeqTrainingArguments(
70
+ output_dir="./results",
71
+ evaluation_strategy="epoch",
72
+ learning_rate=lr,
73
+ per_device_train_batch_size=batch_size,
74
+ per_device_eval_batch_size=PER_DEVICE_EVAL_BATCH,
75
+ weight_decay=WEIGHT_DECAY,
76
+ save_total_limit=SAVE_TOTAL_LIM,
77
+ num_train_epochs=epoch,
78
+ predict_with_generate=True,
79
+ push_to_hub=False,
80
+ gradient_accumulation_steps=gradient_accumulation
81
+ )
82
+ trainer = Seq2SeqTrainer(
83
+ model=self.model,
84
+ args=training_args,
85
+ train_dataset=tokenized_dataset["train"],
86
+ eval_dataset=tokenized_dataset["test"],
87
+ tokenizer=self.tokenizer,
88
+ data_collator=self.data_collator,
89
+ compute_metrics=self.compute_metrics
90
+ )
91
+ trainer.train()
92
+
93
+ current_time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
94
+ full_output = f'models/0_flant5_{current_time}'
95
+ self.model.save_pretrained(full_output)
96
+ self.model.save_pretrained(full_output)
97
+ print("*"*10,": Model is saved!!!")
fine_tune_file/llama_finetune.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from datasets import load_dataset, Dataset
4
+ import pandas as pd
5
+ import transformers
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
7
+ from trl import SFTTrainer
8
+ import transformers
9
+ # from peft import AutoPeftModelForCausalLM
10
+ from transformers import GenerationConfig
11
+ from pynvml import *
12
+ import glob
13
+ class llama_trainer:
14
+ def llama_model(self,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout):
15
+ # base_model = "NousResearch/Llama-2-7b-chat-hf"
16
+ base_model="NousResearch/Meta-Llama-3-8B"
17
+ # base_model="unsloth/Meta-Llama-3.1-8B-Instruct"
18
+ from datetime import datetime
19
+ lora_output = f'models/{quantization}_Llama_lora_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
20
+ full_output = f'models/{quantization}_Llama_full_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
21
+ DEVICE = 'cuda'
22
+
23
+ # set quantization config
24
+ bnb_config = BitsAndBytesConfig(
25
+ load_in_8bit= True,
26
+ )
27
+ if quantization == 4:
28
+ bnb_config = BitsAndBytesConfig(
29
+ load_in_4bit= True,
30
+ bnb_4bit_use_double_quant=True,
31
+ bnb_4bit_quant_type="nf4",
32
+ bnb_4bit_compute_dtype=torch.bfloat16
33
+ )
34
+ model = AutoModelForCausalLM.from_pretrained(
35
+ base_model,
36
+ quantization_config=bnb_config,
37
+ torch_dtype=torch.bfloat16,
38
+ device_map="auto",
39
+ trust_remote_code=True,
40
+ )
41
+ model.config.use_cache = False # silence the warnings
42
+ model.config.pretraining_tp = 1
43
+ model.gradient_checkpointing_enable()
44
+
45
+ tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
46
+ tokenizer.padding_side = 'right'
47
+ tokenizer.pad_token = tokenizer.eos_token
48
+ tokenizer.add_eos_token = True
49
+ tokenizer.add_eos_token
50
+
51
+ data_location = r"data/finetune_data.xlsx" ## replace here
52
+ data_df=pd.read_excel( data_location )
53
+
54
+ for i in range(len(data_df)):
55
+
56
+ data_df.loc[i,'Text']="### Instruction:"+str(data_df.loc[i,'question'])+"### Response:"+str(data_df.loc[i,'answer'])
57
+
58
+ dataset = Dataset.from_pandas(data_df)
59
+
60
+ # Set PEFT adapter config (16:32)
61
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
62
+
63
+ config = LoraConfig(
64
+ r= lora_r if lora_r else 16,
65
+ lora_alpha= lora_alpha if lora_alpha else 32,
66
+ target_modules=["q_proj", "v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"], # target all the linear layers for full finetuning
67
+ lora_dropout= lora_dropout if lora_dropout else 0.05,
68
+ bias="none",
69
+ task_type="CAUSAL_LM")
70
+
71
+ # stabilize output layer and layernorms
72
+ model = prepare_model_for_kbit_training(model)
73
+ # Set PEFT adapter on model (Last step)
74
+ model = get_peft_model(model, config)
75
+
76
+ # Set Hyperparameters
77
+ MAXLEN=512
78
+ BATCH_SIZE = batch_size if batch_size else 4
79
+ GRAD_ACC = gradient_accumulation if gradient_accumulation else 4
80
+ OPTIMIZER ='paged_adamw_8bit' # save memory
81
+ LR=lr if lr else 5e-06 # slightly smaller than pretraining lr | and close to LoRA standard
82
+
83
+ # Set training config
84
+ training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,
85
+ gradient_accumulation_steps=GRAD_ACC,
86
+ optim=OPTIMIZER,
87
+ learning_rate=LR,
88
+ fp16=True, # consider compatibility when using bf16
89
+ logging_steps=10,
90
+ num_train_epochs = epoch if epoch else 2,
91
+ output_dir=lora_output,
92
+ remove_unused_columns=True,
93
+
94
+ )
95
+
96
+ # Set collator
97
+ data_collator = transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
98
+
99
+ # Setup trainer
100
+ trainer = SFTTrainer(model=model,
101
+ train_dataset=dataset,
102
+ data_collator=data_collator,
103
+ args=training_config,
104
+ dataset_text_field="Text",
105
+ # callbacks=[early_stop], need to learn, lora easily overfits
106
+ )
107
+
108
+ trainer.train()
109
+
110
+ trainer.save_model(lora_output)
111
+
112
+
113
+ # Get peft config
114
+ from peft import PeftConfig
115
+ config = PeftConfig.from_pretrained(lora_output)
116
+
117
+ model = transformers.AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
118
+
119
+ # Load the Lora model
120
+ from peft import PeftModel
121
+ model = PeftModel.from_pretrained(model, lora_output)
122
+
123
+ # Get tokenizer
124
+ tokenizer = transformers.AutoTokenizer.from_pretrained(config.base_model_name_or_path)
125
+ merged_model = model.merge_and_unload()
126
+
127
+ merged_model.save_pretrained(full_output)
128
+ tokenizer.save_pretrained(full_output)
129
+ print("*"*10,": Model is saved!!!")
130
+
131
+
132
+ lm=llama_trainer()
133
+ lm.llama_model(5e-6,2,4,4,8,16,32,.05)
134
+
135
+
136
+
fine_tune_file/mistral_finetune.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %% Saved
2
+ import os
3
+ import torch
4
+ from datasets import load_dataset, Dataset
5
+ import pandas as pd
6
+ import transformers
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
8
+ from trl import SFTTrainer
9
+ import transformers
10
+ # from peft import AutoPeftModelForCausalLM
11
+ from transformers import GenerationConfig
12
+ from pynvml import *
13
+ import glob
14
+ class mistral_trainer:
15
+ def formatted_text(self,x,tokenizer):
16
+ temp = [
17
+ # {"role": "system", "content": "Answer as a medical assistant. Respond concisely."},
18
+ {"role": "user", "content": """You are a helpful chatbot, help users by answering their queries.
19
+ Question: """ + x["question"]},
20
+ {"role": "assistant", "content": x["answer"]}
21
+ ]
22
+ return tokenizer.apply_chat_template(temp, add_generation_prompt=False, tokenize=False)
23
+ def mistral_finetune(self,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout):
24
+ # base_model = "mistralai/Mistral-7B-Instruct-v0.2"
25
+ base_model="unsloth/mistral-7b-instruct-v0.3"
26
+ from datetime import datetime
27
+ lora_output = f'models/{quantization}_Mistral_lora_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
28
+ full_output = f'models/{quantization}_Mistral_full_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
29
+ DEVICE = 'cuda'
30
+ tokenizer = AutoTokenizer.from_pretrained(base_model)
31
+ tokenizer.padding_side = 'right'
32
+ ### read csv with Prompt, Answer pair
33
+ data_location = r"data/finetune_data.xlsx" ## replace here
34
+ data_df=pd.read_excel(data_location)
35
+ ### set formatting
36
+ data_df["text"] = data_df[["question", "answer"]].apply(lambda x: self.formatted_text(x,tokenizer), axis=1) ## replace Prompt and Answer if collected dataset has different column names
37
+ print(data_df.iloc[0])
38
+ dataset = Dataset.from_pandas(data_df)
39
+ # set quantization config
40
+ bnb_config = BitsAndBytesConfig(
41
+ load_in_8bit= True,
42
+ )
43
+ if quantization == 4:
44
+ print("*"*10,": 4 bit quantization")
45
+ bnb_config = BitsAndBytesConfig(
46
+ load_in_4bit= True,
47
+ bnb_4bit_use_double_quant=True,
48
+ bnb_4bit_quant_type="nf4",
49
+ bnb_4bit_compute_dtype=torch.bfloat16
50
+ )
51
+ model = AutoModelForCausalLM.from_pretrained(
52
+ base_model,
53
+ quantization_config=bnb_config,
54
+ torch_dtype=torch.bfloat16,
55
+ device_map="auto",
56
+ trust_remote_code=True,
57
+ )
58
+ model.config.use_cache = False # silence the warnings
59
+ model.config.pretraining_tp = 1
60
+ model.gradient_checkpointing_enable()
61
+
62
+ tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
63
+ tokenizer.padding_side = 'right'
64
+ tokenizer.pad_token = tokenizer.eos_token
65
+ tokenizer.add_eos_token = True
66
+ tokenizer.add_bos_token, tokenizer.add_eos_token
67
+
68
+ # Set PEFT adapter config (16:32)
69
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
70
+
71
+ # target modules are currently selected for zephyr base model
72
+ config = LoraConfig(
73
+ r= lora_r if lora_r else 16,
74
+ lora_alpha= lora_alpha if lora_alpha else 32,
75
+ target_modules=["q_proj", "v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"], # target all the linear layers for full finetuning
76
+ lora_dropout= lora_dropout if lora_dropout else 0.05,
77
+ bias="none",
78
+ task_type="CAUSAL_LM")
79
+
80
+ # stabilize output layer and layernorms
81
+ model = prepare_model_for_kbit_training(model)
82
+ # Set PEFT adapter on model (Last step)
83
+ model = get_peft_model(model, config)
84
+
85
+ # Set Hyperparameters
86
+ MAXLEN=512
87
+ BATCH_SIZE = batch_size if batch_size else 4
88
+ GRAD_ACC = gradient_accumulation if gradient_accumulation else 4
89
+ OPTIMIZER ='paged_adamw_8bit' # save memory
90
+ LR=lr if lr else 5e-06 # slightly smaller than pretraining lr | and close to LoRA standard
91
+
92
+ training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,
93
+ gradient_accumulation_steps=GRAD_ACC,
94
+ optim=OPTIMIZER,
95
+ learning_rate=LR,
96
+ fp16=True, # consider compatibility when using bf16
97
+ logging_steps=10,
98
+ num_train_epochs = epoch if epoch else 2,
99
+ output_dir=lora_output,
100
+ remove_unused_columns=True,
101
+ )
102
+
103
+ # Set collator
104
+ data_collator = transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
105
+
106
+ # Setup trainer
107
+ trainer = SFTTrainer(model=model,
108
+ train_dataset=dataset,
109
+ data_collator=data_collator,
110
+ args=training_config,
111
+ dataset_text_field="text",
112
+ # callbacks=[early_stop], need to learn, lora easily overfits
113
+ )
114
+
115
+ trainer.train()
116
+ print("*"*10,": Finetune ended!!!!")
117
+ trainer.save_model(lora_output)
118
+
119
+ # Get peft config
120
+ from peft import PeftConfig
121
+ config = PeftConfig.from_pretrained(lora_output)
122
+
123
+ model = transformers.AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
124
+
125
+ # Load the Lora model
126
+ from peft import PeftModel
127
+ model = PeftModel.from_pretrained(model, lora_output)
128
+
129
+ # Get tokenizer
130
+ tokenizer = transformers.AutoTokenizer.from_pretrained(config.base_model_name_or_path)
131
+ merged_model = model.merge_and_unload()
132
+
133
+ merged_model.save_pretrained(full_output)
134
+ tokenizer.save_pretrained(full_output)
135
+ print("*"*10,": Model is saved!!!")
136
+
137
+ mis=mistral_trainer()
138
+ mis.mistral_finetune(5e-6,2,4,4,8,16,32,.05)
fine_tune_file/modular_finetune.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from datetime import datetime
4
+ from datasets import Dataset
5
+ import pandas as pd
6
+ import transformers
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, T5Tokenizer, T5ForConditionalGeneration
8
+ from trl import SFTTrainer
9
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftConfig, PeftModel
10
+ import os
11
+ # cache_folder="/mnt/FA00A16100A1259B/shakib/model_cache"
12
+ # os.environ['HF_HOME'] = cache_folder
13
+ # os.environ['HF_DATASETS_CACHE'] = cache_folder
14
+ # os.environ['TRANSFORMERS_CACHE'] = cache_folder
15
+ # print("*"*20)
16
+ # print(os.environ['HF_HOME'])
17
+ # print(os.environ['HF_DATASETS_CACHE'])
18
+ # print(os.environ['TRANSFORMERS_CACHE'])
19
+ # print(os.path.isdir(cache_folder))
20
+ # assert(False)
21
+ class BaseTrainer:
22
+ def __init__(self, model_name, base_model):
23
+ self.model_name = model_name
24
+ self.base_model = base_model
25
+ self.tokenizer = self.load_tokenizer()
26
+ self.model = self.load_model()
27
+
28
+ def load_tokenizer(self):
29
+ tokenizer = AutoTokenizer.from_pretrained(self.base_model, trust_remote_code=True)
30
+ tokenizer.padding_side = 'right'
31
+ tokenizer.pad_token = tokenizer.eos_token
32
+ return tokenizer
33
+
34
+ def load_model(self):
35
+ raise NotImplementedError("Subclasses must implement load_model method")
36
+
37
+ def preprocess_function(self, examples):
38
+ raise NotImplementedError("Subclasses must implement preprocess_function method")
39
+
40
+ def formatted_text(self, x):
41
+ self.tokenizer.chat_template = {
42
+ "role": "user",
43
+ "content": "You are a helpful chatbot. Question: {question}\nAnswer: {answer}"
44
+ }
45
+ temp = [
46
+ {"role": "user", "content": f"You are a helpful chatbot, help users by answering their queries.\nQuestion: {x['question']}"},
47
+ {"role": "assistant", "content": x["answer"]}
48
+ ]
49
+ return self.tokenizer.apply_chat_template(temp, add_generation_prompt=False, tokenize=False,chat_template='content')
50
+
51
+ def train(self, lr, epoch, batch_size, gradient_accumulation, quantization, lora_r, lora_alpha, lora_dropout):
52
+ csv_file = "data/finetune_data.csv"
53
+ xlsx_file = "data/finetune_data.xlsx"
54
+ if os.path.exists(csv_file):
55
+ data_df = pd.read_csv(csv_file)
56
+ print("Reading CSV file...")
57
+ elif os.path.exists(xlsx_file):
58
+ data_df = pd.read_excel(xlsx_file)
59
+ print("CSV file not found, reading Excel file...")
60
+
61
+ data_df["text"] = data_df[["question", "answer"]].apply(lambda x: self.formatted_text(x), axis=1)
62
+ dataset = Dataset.from_pandas(data_df)
63
+
64
+ lora_output = f'models/{quantization}_{self.model_name}_lora_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
65
+ full_output = f'models/{quantization}_{self.model_name}_full_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
66
+
67
+ config = LoraConfig(
68
+ r=lora_r or 16,
69
+ lora_alpha=lora_alpha or 32,
70
+ target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
71
+ lora_dropout=lora_dropout or 0.05,
72
+ bias="none",
73
+ task_type="CAUSAL_LM"
74
+ )
75
+
76
+ self.model = prepare_model_for_kbit_training(self.model)
77
+ self.model = get_peft_model(self.model, config)
78
+
79
+ training_args = transformers.TrainingArguments(
80
+ per_device_train_batch_size=batch_size or 4,
81
+ gradient_accumulation_steps=gradient_accumulation or 4,
82
+ optim='paged_adamw_8bit',
83
+ learning_rate=lr or 5e-6,
84
+ fp16=True,
85
+ logging_steps=10,
86
+ num_train_epochs=epoch or 2,
87
+ output_dir=lora_output,
88
+ remove_unused_columns=True,
89
+ )
90
+
91
+ data_collator = transformers.DataCollatorForLanguageModeling(self.tokenizer, mlm=False)
92
+
93
+ trainer = SFTTrainer(
94
+ model=self.model,
95
+ train_dataset=dataset,
96
+ data_collator=data_collator,
97
+ args=training_args,
98
+ dataset_text_field="text",
99
+ )
100
+
101
+ trainer.train()
102
+ trainer.save_model(lora_output)
103
+
104
+ config = PeftConfig.from_pretrained(lora_output)
105
+ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
106
+ model = PeftModel.from_pretrained(model, lora_output)
107
+
108
+ merged_model = model.merge_and_unload()
109
+ merged_model.save_pretrained(full_output)
110
+ self.tokenizer.save_pretrained(full_output)
111
+ print("*" * 10, ": Model is saved!!!")
112
+
113
+ class LlamaTrainer(BaseTrainer):
114
+ def load_model(self):
115
+ bnb_config = BitsAndBytesConfig(
116
+ load_in_4bit=True,
117
+ bnb_4bit_use_double_quant=True,
118
+ bnb_4bit_quant_type="nf4",
119
+ bnb_4bit_compute_dtype=torch.bfloat16
120
+ )
121
+ model = AutoModelForCausalLM.from_pretrained(
122
+ self.base_model,
123
+ quantization_config=bnb_config,
124
+ torch_dtype=torch.bfloat16,
125
+ device_map="auto",
126
+ trust_remote_code=True,
127
+ # cache_dir=cache_folder
128
+ )
129
+ model.config.use_cache = False
130
+ model.config.pretraining_tp = 1
131
+ model.gradient_checkpointing_enable()
132
+ return model
133
+
134
+ class MistralTrainer(BaseTrainer):
135
+ def load_model(self):
136
+ return LlamaTrainer.load_model(self)
137
+
138
+ class PhiTrainer(BaseTrainer):
139
+ def load_model(self):
140
+ return LlamaTrainer.load_model(self)
141
+
142
+ class ZephyrTrainer(BaseTrainer):
143
+ def load_model(self):
144
+ return LlamaTrainer.load_model(self)
145
+
146
+ class FlanT5Trainer(BaseTrainer):
147
+ def load_tokenizer(self):
148
+ tokenizer = T5Tokenizer.from_pretrained(self.base_model)
149
+ tokenizer.padding_side = 'right'
150
+ return tokenizer
151
+
152
+ def load_model(self):
153
+ model = T5ForConditionalGeneration.from_pretrained(self.base_model)
154
+ return model
155
+
156
+ def preprocess_function(self, examples):
157
+ prefix = "Please answer this question: "
158
+ inputs = [prefix + doc for doc in examples["question"]]
159
+ model_inputs = self.tokenizer(inputs, max_length=1024, truncation=True)
160
+ labels = self.tokenizer(text_target=examples["answer"], max_length=1024, truncation=True)
161
+ model_inputs["labels"] = labels["input_ids"]
162
+ return model_inputs
163
+
164
+ def get_trainer(model_name):
165
+ model_map = {
166
+ "Llama": ("NousResearch/Meta-Llama-3-8B", LlamaTrainer),
167
+ "Mistral": ("unsloth/mistral-7b-instruct-v0.3", MistralTrainer),
168
+ "Phi": ("microsoft/Phi-3-mini-4k-instruct", PhiTrainer),
169
+ "Zephyr": ("HuggingFaceH4/zephyr-7b-beta", ZephyrTrainer),
170
+ "Flant5": ("google/flan-t5-base", FlanT5Trainer),
171
+ }
172
+
173
+ base_model, trainer_class = model_map[model_name]
174
+ if not base_model or not trainer_class:
175
+ raise ValueError(f"Unsupported model: {model_name}")
176
+
177
+ return trainer_class(model_name, base_model)
fine_tune_file/phi_finetune.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %% Saved
2
+ import os
3
+ import torch
4
+ from datasets import load_dataset, Dataset
5
+ import pandas as pd
6
+ import transformers
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
8
+ from trl import SFTTrainer
9
+ import transformers
10
+ # from peft import AutoPeftModelForCausalLM
11
+ from transformers import GenerationConfig
12
+ from pynvml import *
13
+ import glob
14
+ class phi_trainer:
15
+ def formatted_text(self,x,tokenizer):
16
+ temp = [
17
+ # {"role": "system", "content": "Answer as a medical assistant. Respond concisely."},
18
+ {"role": "user", "content": """You are a helpful chatbot, help users by answering their queries.
19
+ Question: """ + x["question"]},
20
+ {"role": "assistant", "content": x["answer"]}
21
+ ]
22
+ return tokenizer.apply_chat_template(temp, add_generation_prompt=False, tokenize=False)
23
+ def phi_finetune(self,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout):
24
+ base_model = "microsoft/Phi-3-mini-4k-instruct"
25
+ from datetime import datetime
26
+ lora_output = f'models/{quantization}_Phi_lora_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
27
+ full_output = f'models/{quantization}_Phi_full_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
28
+ DEVICE = 'cuda'
29
+ tokenizer = AutoTokenizer.from_pretrained(base_model)
30
+ tokenizer.padding_side = 'right'
31
+ ### read csv with Prompt, Answer pair
32
+ data_location = r"data/finetune_data.xlsx" ## replace here
33
+ data_df=pd.read_excel( data_location )
34
+ ### set formatting
35
+ data_df["text"] = data_df[["question", "answer"]].apply(lambda x: self.formatted_text(x,tokenizer), axis=1) ## replace Prompt and Answer if collected dataset has different column names
36
+ print(data_df.iloc[0])
37
+ dataset = Dataset.from_pandas(data_df)
38
+ # set quantization config
39
+ bnb_config = BitsAndBytesConfig(
40
+ load_in_8bit= True,
41
+ )
42
+ if quantization == 4:
43
+ print("*"*10,": 4 bit quantization")
44
+ bnb_config = BitsAndBytesConfig(
45
+ load_in_4bit= True,
46
+ bnb_4bit_use_double_quant=True,
47
+ bnb_4bit_quant_type="nf4",
48
+ bnb_4bit_compute_dtype=torch.bfloat16
49
+ )
50
+ model = AutoModelForCausalLM.from_pretrained(
51
+ base_model,
52
+ quantization_config=bnb_config,
53
+ torch_dtype=torch.bfloat16,
54
+ device_map="auto",
55
+ trust_remote_code=True,
56
+ )
57
+ model.config.use_cache = False # silence the warnings
58
+ model.config.pretraining_tp = 1
59
+ model.gradient_checkpointing_enable()
60
+
61
+ tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
62
+ tokenizer.padding_side = 'right'
63
+ tokenizer.pad_token = tokenizer.eos_token
64
+
65
+
66
+ # Set PEFT adapter config (16:32)
67
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
68
+
69
+ # target modules are currently selected for zephyr base model
70
+ config = LoraConfig(
71
+ r= lora_r if lora_r else 16,
72
+ lora_alpha= lora_alpha if lora_alpha else 32,
73
+ target_modules=["q_proj", "v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"], # target all the linear layers for full finetuning
74
+ lora_dropout= lora_dropout if lora_dropout else 0.05,
75
+ bias="none",
76
+ task_type="CAUSAL_LM")
77
+
78
+ # stabilize output layer and layernorms
79
+ model = prepare_model_for_kbit_training(model)
80
+ # Set PEFT adapter on model (Last step)
81
+ model = get_peft_model(model, config)
82
+
83
+ # Set Hyperparameters
84
+ MAXLEN=512
85
+ BATCH_SIZE = batch_size if batch_size else 4
86
+ GRAD_ACC = gradient_accumulation if gradient_accumulation else 4
87
+ OPTIMIZER ='paged_adamw_8bit' # save memory
88
+ LR=lr if lr else 5e-06 # slightly smaller than pretraining lr | and close to LoRA standard
89
+
90
+ training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,
91
+ gradient_accumulation_steps=GRAD_ACC,
92
+ optim=OPTIMIZER,
93
+ learning_rate=LR,
94
+ fp16=True, # consider compatibility when using bf16
95
+ logging_steps=10,
96
+ num_train_epochs = epoch if epoch else 2,
97
+ output_dir=lora_output,
98
+ remove_unused_columns=True,
99
+ )
100
+
101
+ # Set collator
102
+ data_collator = transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
103
+
104
+ # Setup trainer
105
+ trainer = SFTTrainer(model=model,
106
+ train_dataset=dataset,
107
+ data_collator=data_collator,
108
+ args=training_config,
109
+ dataset_text_field="text",
110
+ # callbacks=[early_stop], need to learn, lora easily overfits
111
+ )
112
+
113
+ trainer.train()
114
+ print("*"*10,": Finetune ended!!!!")
115
+ trainer.save_model(lora_output)
116
+
117
+ # Get peft config
118
+ from peft import PeftConfig
119
+ config = PeftConfig.from_pretrained(lora_output)
120
+
121
+ model = transformers.AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,trust_remote_code=True)
122
+
123
+ # tokenizer = transformers.AutoTokenizer.from_pretrained(base_model)
124
+
125
+ # Load the Lora model
126
+ from peft import PeftModel
127
+ model = PeftModel.from_pretrained(model, lora_output)
128
+
129
+ # Get tokenizer
130
+ tokenizer = transformers.AutoTokenizer.from_pretrained(config.base_model_name_or_path)
131
+ merged_model = model.merge_and_unload()
132
+
133
+ merged_model.save_pretrained(full_output)
134
+ tokenizer.save_pretrained(full_output)
135
+ print("*"*10,": Model is saved!!!")
fine_tune_file/zepyhr_finetune.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #code changed
2
+ import os
3
+ import torch
4
+ from datasets import load_dataset, Dataset
5
+ import pandas as pd
6
+ import transformers
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
8
+ from trl import SFTTrainer
9
+ import transformers
10
+ # from peft import AutoPeftModelForCausalLM
11
+ from transformers import GenerationConfig
12
+ from pynvml import *
13
+ import glob
14
+ class zephyr_trainer:
15
+ def formatted_text(self,x,tokenizer):
16
+ temp = [
17
+ # {"role": "system", "content": "Answer as a medical assistant. Respond concisely."},
18
+ {"role": "user", "content": """You are a helpful chatbot, help users by answering their queries.
19
+ Question: """ + x["question"]},
20
+ {"role": "assistant", "content": x["answer"]}
21
+ ]
22
+ return tokenizer.apply_chat_template(temp, add_generation_prompt=False, tokenize=False)
23
+ def zepyhr_model(self,lr,epoch,batch_size,gradient_accumulation,quantization,lora_r,lora_alpha,lora_dropout):
24
+ base_model = 'HuggingFaceH4/zephyr-7b-beta'
25
+ from datetime import datetime
26
+ lora_output = f'models/{quantization}_Zepyhr_lora_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
27
+ full_output = f'models/{quantization}_Zepyhr_full_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
28
+ DEVICE = 'cuda'
29
+ tokenizer = AutoTokenizer.from_pretrained(base_model)
30
+ tokenizer.padding_side = 'right'
31
+ ### read csv with Prompt, Answer pair
32
+ data_location = r"data/finetune_data.xlsx" ## replace here
33
+ data_df=pd.read_excel( data_location )
34
+ ### set formatting
35
+ data_df["text"] = data_df[["question", "answer"]].apply(lambda x: self.formatted_text(x,tokenizer), axis=1) ## replace Prompt and Answer if collected dataset has different column names
36
+ print(data_df.iloc[0])
37
+ dataset = Dataset.from_pandas(data_df)
38
+
39
+
40
+ # set quantization config
41
+ bnb_config = BitsAndBytesConfig(
42
+ load_in_8bit= True,
43
+ )
44
+ if quantization == 4:
45
+ bnb_config = BitsAndBytesConfig(
46
+ load_in_4bit= True,
47
+ bnb_4bit_use_double_quant=True,
48
+ bnb_4bit_quant_type="nf4",
49
+ bnb_4bit_compute_dtype=torch.bfloat16
50
+ )
51
+ model = AutoModelForCausalLM.from_pretrained(
52
+ base_model,
53
+ quantization_config=bnb_config,
54
+ torch_dtype=torch.bfloat16,
55
+ device_map="auto",
56
+ trust_remote_code=True,
57
+ )
58
+ model.config.use_cache = False # silence the warnings
59
+ model.config.pretraining_tp = 1
60
+ model.gradient_checkpointing_enable()
61
+
62
+ tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
63
+ tokenizer.padding_side = 'right'
64
+ tokenizer.pad_token = tokenizer.eos_token
65
+ tokenizer.add_eos_token = True
66
+ tokenizer.add_bos_token, tokenizer.add_eos_token
67
+
68
+ # Set PEFT adapter config (16:32)
69
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
70
+
71
+ # target modules are currently selected for zephyr base model
72
+ config = LoraConfig(
73
+ r= lora_r if lora_r else 16,
74
+ lora_alpha= lora_alpha if lora_alpha else 32,
75
+ target_modules=["q_proj", "v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"], # target all the linear layers for full finetuning
76
+ lora_dropout= lora_dropout if lora_dropout else 0.05,
77
+ bias="none",
78
+ task_type="CAUSAL_LM")
79
+
80
+ # stabilize output layer and layernorms
81
+ model = prepare_model_for_kbit_training(model)
82
+ # Set PEFT adapter on model (Last step)
83
+ model = get_peft_model(model, config)
84
+
85
+ # Set Hyperparameters
86
+ MAXLEN=512
87
+ BATCH_SIZE = batch_size if batch_size else 4
88
+ GRAD_ACC = gradient_accumulation if gradient_accumulation else 4
89
+ OPTIMIZER ='paged_adamw_8bit' # save memory
90
+ LR=lr if lr else 5e-06 # slightly smaller than pretraining lr | and close to LoRA standard
91
+
92
+
93
+ training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,
94
+ gradient_accumulation_steps=GRAD_ACC,
95
+ optim=OPTIMIZER,
96
+ learning_rate=LR,
97
+ fp16=True, # consider compatibility when using bf16
98
+ logging_steps=10,
99
+ num_train_epochs = epoch if epoch else 2,
100
+ output_dir=lora_output,
101
+ remove_unused_columns=True,
102
+ )
103
+
104
+ # Set collator
105
+ data_collator = transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
106
+
107
+ # Setup trainer
108
+ trainer = SFTTrainer(model=model,
109
+ train_dataset=dataset,
110
+ data_collator=data_collator,
111
+ args=training_config,
112
+ dataset_text_field="text",
113
+ # callbacks=[early_stop], need to learn, lora easily overfits
114
+ )
115
+
116
+ trainer.train()
117
+ print("*"*10,": Finetune ended!!!!")
118
+ trainer.save_model(lora_output)
119
+
120
+ # Get peft config
121
+ from peft import PeftConfig
122
+ config = PeftConfig.from_pretrained(lora_output)
123
+
124
+ model = transformers.AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
125
+
126
+ # Load the Lora model
127
+ from peft import PeftModel
128
+ model = PeftModel.from_pretrained(model, lora_output)
129
+
130
+ # Get tokenizer
131
+ tokenizer = transformers.AutoTokenizer.from_pretrained(config.base_model_name_or_path)
132
+ merged_model = model.merge_and_unload()
133
+
134
+ merged_model.save_pretrained(full_output)
135
+ tokenizer.save_pretrained(full_output)
136
+ print("*"*10,": Model is saved!!!")
inference.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import pandas as pd
4
+ import transformers
5
+ from pynvml import *
6
+ import torch
7
+ from langchain import hub
8
+ from langchain_core.output_parsers import StrOutputParser
9
+ from langchain_core.runnables import RunnablePassthrough
10
+ from model_ret import load_model_and_pipeline
11
+ from create_retriever import retriever_chroma
12
+
13
+ # Model chain class
14
+ class model_chain:
15
+ model_name = ""
16
+
17
+ def __init__(self,
18
+ model_name_local,
19
+ model_name_online="Llama",
20
+ use_online=True,
21
+ embedding_name="BAAI/bge-base-en-v1.5",
22
+ splitter_type_dropdown="character",
23
+ chunk_size_slider=512,
24
+ chunk_overlap_slider=30,
25
+ separator_textbox="\n",
26
+ max_tokens_slider=2048) -> None:
27
+ if os.path.exists(f"models//{model_name_local}") and len(os.listdir(f"models//{model_name_local}")):
28
+ import gradio as gr
29
+ gr.Info("Model *()* from online!!")
30
+ self.model_name = model_name_local
31
+ else:
32
+ self.model_name = model_name_online
33
+
34
+ self.tokenizer, self.model, self.llm = load_model_and_pipeline(self.model_name)
35
+ # Creating the retriever
36
+ # self.retriever = ensemble_retriever(embedding_name,
37
+ # splitter_type=splitter_type_dropdown,
38
+ # chunk_size=chunk_size_slider,
39
+ # chunk_overlap=chunk_overlap_slider,
40
+ # separator=separator_textbox,
41
+ # max_tokens=max_tokens_slider)
42
+ self.retriever = retriever_chroma(False, embedding_name, splitter_type_dropdown,
43
+ chunk_size_slider, chunk_size_slider,
44
+ separator_textbox, max_tokens_slider)
45
+
46
+ # Defining the RAG chain
47
+ prompt = hub.pull("rlm/rag-prompt")
48
+ self.rag_chain = (
49
+ {"context": self.retriever | self.format_docs, "question": RunnablePassthrough()}
50
+ | prompt
51
+ | self.llm
52
+ | StrOutputParser()
53
+ )
54
+
55
+ # Helper function to format documents
56
+ def format_docs(self, docs):
57
+ return "\n\n".join(doc.page_content for doc in docs)
58
+
59
+ # Retrieve RAG chain
60
+ def rag_chain_ret(self):
61
+ return self.rag_chain
62
+
63
+ # Answer retrieval function
64
+ def ans_ret(self, inp):
65
+ if self.model_name == 'Flant5':
66
+ my_question = "What is KUET?"
67
+ data = self.retriever.invoke(inp)
68
+ context = ""
69
+ for x in data[:2]:
70
+ context += (x.page_content) + "\n"
71
+ inputs = f"""Please answer to this question using this context:\n{context}\n{my_question}"""
72
+ inputs = self.tokenizer(inputs, return_tensors="pt")
73
+ outputs = self.model.generate(**inputs)
74
+ answer = self.tokenizer.decode(outputs[0])
75
+ from textwrap import fill
76
+ ans = fill(answer, width=100)
77
+ return ans
78
+
79
+ ans = self.rag_chain.invoke(inp)
80
+ ans = ans.split("Answer:")[1]
81
+ return ans
model_ret.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from time import time
4
+ from torch import cuda, bfloat16
5
+ from langchain import HuggingFacePipeline
6
+ from transformers import (AutoTokenizer, AutoModelForCausalLM, T5Tokenizer, AutoConfig,
7
+ T5ForConditionalGeneration, pipeline, BitsAndBytesConfig)
8
+
9
+ def load_model_and_pipeline(model_id, temperature=0):
10
+ device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
11
+
12
+ bnb_config = BitsAndBytesConfig(
13
+ load_in_4bit=True,
14
+ bnb_4bit_quant_type='nf4',
15
+ bnb_4bit_use_double_quant=True,
16
+ bnb_4bit_compute_dtype=bfloat16
17
+ )
18
+
19
+ time_1 = time()
20
+ model_config = AutoConfig.from_pretrained(
21
+ model_id,
22
+ )
23
+
24
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=True)
25
+ model = AutoModelForCausalLM.from_pretrained(
26
+ model_id,
27
+ trust_remote_code=True,
28
+ config=model_config,
29
+ quantization_config=bnb_config,
30
+ device_map='auto',
31
+ )
32
+
33
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
34
+ time_2 = time()
35
+ print(f"Prepare model, tokenizer: {round(time_2-time_1, 3)} sec.")
36
+ time_1 = time()
37
+
38
+ pipe = pipeline(
39
+ "text-generation",
40
+ model=model,
41
+ tokenizer=tokenizer,
42
+ dtype=torch.float16,
43
+ device_map="auto",
44
+ max_new_tokens=512,
45
+ do_sample=True,
46
+ top_k=30,
47
+ num_return_sequences=1,
48
+ eos_token_id=tokenizer.eos_token_id)
49
+ time_2 = time()
50
+ print(f"Prepare pipeline: {round(time_2-time_1, 3)} sec.")
51
+
52
+ llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature': temperature})
53
+ return tokenizer, model, llm
54
+
55
+ import pandas as pd
56
+ from datasets import Dataset
57
+
58
+ def calculate_rag_metrics(model_ques_ans_gen, llm_model, embedding_model="BAAI/bge-base-en-v1.5"):
59
+ # Create a dictionary from the model_ques_ans_gen list
60
+ from ragas import evaluate
61
+ from ragas.metrics import faithfulness, answer_correctness,answer_similarity,answer_relevancy,context_recall, context_precision
62
+ # context = {'contexts':'NA'}
63
+ # print([item['question'] for item in model_ques_ans_gen])
64
+ data_samples = {
65
+ 'question': [item['question'] for item in model_ques_ans_gen],
66
+ 'answer': [item['answer'] for item in model_ques_ans_gen],
67
+ 'contexts': [[''] for item in model_ques_ans_gen],
68
+ 'reference': [item['ground_truths'] for item in model_ques_ans_gen]
69
+ }
70
+
71
+ # Convert the dictionary to a pandas DataFrame
72
+ rag_df = pd.DataFrame(data_samples)
73
+
74
+ # Convert the DataFrame to a HuggingFace Dataset
75
+ rag_eval_dataset = Dataset.from_pandas(rag_df)
76
+
77
+ # Define the list of metrics to calculate
78
+ metrics = [
79
+ answer_correctness, answer_similarity,
80
+ answer_relevancy, faithfulness,
81
+ context_recall, context_precision
82
+ ]
83
+
84
+ # Perform the evaluation using the provided LLM and embedding models
85
+ result = evaluate(
86
+ rag_eval_dataset,
87
+ metrics=metrics,
88
+ llm=llm_model,
89
+ embeddings=embedding_model
90
+ )
91
+ # result.to_pandas()
92
+ return result.to_pandas()
model_ret.py.old ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
2
+ from langchain import HuggingFacePipeline
3
+ from transformers import pipeline
4
+ import transformers
5
+ import torch
6
+ from transformers import T5Tokenizer
7
+ from transformers import T5ForConditionalGeneration
8
+ def zepyhr_model(model_info,quanization):
9
+ path=f"models/{model_info}"
10
+ tokenizer = AutoTokenizer.from_pretrained(path,
11
+ use_auth_token=True,)
12
+
13
+ if quanization=="8":
14
+ model = AutoModelForCausalLM.from_pretrained(path,
15
+ device_map='auto',
16
+ torch_dtype=torch.float16,
17
+ use_auth_token=True,
18
+ load_in_8bit=True,
19
+ )
20
+ else:
21
+ model = AutoModelForCausalLM.from_pretrained(path,
22
+ device_map='auto',
23
+ torch_dtype=torch.float16,
24
+ use_auth_token=True,
25
+ load_in_4bit=True
26
+ )
27
+
28
+ pipe = pipeline("text-generation",
29
+ model=model,
30
+ tokenizer= tokenizer,
31
+ torch_dtype=torch.bfloat16,
32
+ device_map="auto",
33
+ max_new_tokens = 512,
34
+ do_sample=True,
35
+ top_k=30,
36
+ num_return_sequences=1,
37
+ eos_token_id=tokenizer.eos_token_id
38
+ )
39
+
40
+ llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
41
+ return llm
42
+ def llama_model(model_info,quanization):
43
+ path=f"models/{model_info}"
44
+ tokenizer = AutoTokenizer.from_pretrained(path,
45
+ use_auth_token=True,)
46
+
47
+ if quanization=="8":
48
+ model = AutoModelForCausalLM.from_pretrained(path,
49
+ device_map='auto',
50
+ torch_dtype=torch.float16,
51
+ use_auth_token=True,
52
+ load_in_8bit=True,
53
+ )
54
+ else:
55
+ model = AutoModelForCausalLM.from_pretrained(path,
56
+ device_map='auto',
57
+ torch_dtype=torch.float16,
58
+ use_auth_token=True,
59
+ load_in_4bit=True
60
+ )
61
+ pipe = pipeline("text-generation",
62
+ model=model,
63
+ tokenizer= tokenizer,
64
+ torch_dtype=torch.bfloat16,
65
+ device_map="auto",
66
+ max_new_tokens = 512,
67
+ do_sample=True,
68
+ top_k=30,
69
+ num_return_sequences=1,
70
+ eos_token_id=tokenizer.eos_token_id
71
+ )
72
+ llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
73
+ return llm
74
+
75
+ def mistral_model(model_info,quanization):
76
+ path=f"models/{model_info}"
77
+ tokenizer = AutoTokenizer.from_pretrained(path,
78
+ use_auth_token=True,)
79
+
80
+ if quanization=="8":
81
+ model = AutoModelForCausalLM.from_pretrained(path,
82
+ device_map='auto',
83
+ torch_dtype=torch.float16,
84
+ use_auth_token=True,
85
+ load_in_8bit=True,
86
+ )
87
+ else:
88
+ model = AutoModelForCausalLM.from_pretrained(path,
89
+ device_map='auto',
90
+ torch_dtype=torch.float16,
91
+ use_auth_token=True,
92
+ load_in_4bit=True
93
+ )
94
+
95
+
96
+
97
+ pipe = pipeline("text-generation",
98
+ model=model,
99
+ tokenizer= tokenizer,
100
+ torch_dtype=torch.bfloat16,
101
+ device_map="auto",
102
+ max_new_tokens = 512,
103
+ do_sample=True,
104
+ top_k=30,
105
+ num_return_sequences=1,
106
+ eos_token_id=tokenizer.eos_token_id
107
+ )
108
+
109
+ llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
110
+ return llm
111
+
112
+ def phi_model(model_info,quanization):
113
+ path=f"models/{model_info}"
114
+ tokenizer = AutoTokenizer.from_pretrained(path,
115
+ use_auth_token=True,)
116
+
117
+ if quanization=="8":
118
+ model = AutoModelForCausalLM.from_pretrained(path,
119
+ device_map='auto',
120
+ torch_dtype=torch.float16,
121
+ use_auth_token=True,
122
+ load_in_8bit=True,
123
+ trust_remote_code=True
124
+ )
125
+ else:
126
+ model = AutoModelForCausalLM.from_pretrained(path,
127
+ device_map='auto',
128
+ torch_dtype=torch.float16,
129
+ use_auth_token=True,
130
+ load_in_4bit=True,
131
+ trust_remote_code=True
132
+ )
133
+
134
+
135
+
136
+ pipe = pipeline("text-generation",
137
+ model=model,
138
+ tokenizer= tokenizer,
139
+ torch_dtype=torch.bfloat16,
140
+ device_map="auto",
141
+ max_new_tokens = 512,
142
+ do_sample=True,
143
+ top_k=30,
144
+ num_return_sequences=1,
145
+ eos_token_id=tokenizer.eos_token_id
146
+ )
147
+
148
+ llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
149
+ return llm
150
+
151
+ def flant5_model(model_info):
152
+ path=f"models/{model_info}"
153
+ model = T5ForConditionalGeneration.from_pretrained(path)
154
+ MODEL_NAME = "google/flan-t5-base"
155
+ tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
156
+ pipe = pipeline("text-generation",
157
+ model=model,
158
+ tokenizer= tokenizer,
159
+ torch_dtype=torch.bfloat16,
160
+ device_map="auto",
161
+ max_new_tokens = 512,
162
+ do_sample=True,
163
+ top_k=30,
164
+ num_return_sequences=1,
165
+ eos_token_id=tokenizer.eos_token_id
166
+ )
167
+
168
+ llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
169
+ return tokenizer,model,llm
170
+
171
+ # model = AutoModelForCausalLM.from_pretrained(
172
+ # path,
173
+ # # quantization_config=bnb_config,
174
+ # device_map="auto",
175
+ # trust_remote_code=True,
176
+ # attn_implementation="flash_attention_2",
177
+ # torch_dtype=torch.bfloat16,
178
+
179
+ # )
models/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # ignore all files except .gitignore :
2
+ *
3
+ !.gitignore
params.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "M1", "embedding_name": "BAAI/bge-base-en-v1.5", "splitter_type_dropdown": "character", "chunk_size_slider": 500, "chunk_overlap_slider": 30, "separator_textbox": "\n", "max_tokens_slider": 1000}
requirements.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.44
2
+ langchain==0.3.27
3
+ langchain-community==0.3.31
4
+ llama-index
5
+ llama-index-core
6
+ fastapi==0.112.4
7
+ transformers==4.57.0
8
+ pynvml==12.0.0
9
+ datasets==4.0.0
10
+ openpyxl==3.1.5
11
+ trl==0.25.1
12
+ peft==0.17.1
13
+ bitsandbytes==0.48.2
14
+ docx2txt==0.9
15
+ torch
16
+ torchvision
17
+ torchaudio
18
+ jupyter==1.1.1
19
+ langchainhub==0.1.21
20
+ sentence-transformers==5.1.1
21
+ faiss-gpu-cu12==1.13.0
22
+ accelerate==1.10.1
23
+ ninja==1.13.0
24
+ wandb==0.22.2
25
+ docx==0.2.4
26
+ chromadb==1.3.5
27
+ pypdf==6.4.0
28
+ ragas==0.3.9
29
+ # flash-attn --no-build-isolation
score_report/2024_03_25_21_10_54model_ans_mistral_finetuned_486_colbert.xlsx ADDED
Binary file (21.9 kB). View file
 
score_report/2024_03_25_22_09_44model_ans_zepyhr_finetuned_486_colbert.xlsx ADDED
Binary file (18.4 kB). View file
 
score_report/2024_03_26_22_42_56model_ans_llama_finetuned_486_rag_colbert.xlsx ADDED
Binary file (13.2 kB). View file
 
test1.txt ADDED
File without changes
testing.ipynb ADDED
@@ -0,0 +1,712 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "def score_report_bar():\n",
10
+ " path=r\"C:\\Users\\Inception\\Desktop\\LLM-based-QA-chatbot-builder\\UI\\score_report\"\n",
11
+ " import os\n",
12
+ " import math\n",
13
+ " dat=[]\n",
14
+ " for x in os.listdir(path):\n",
15
+ " wh=[]\n",
16
+ " flag=0\n",
17
+ " for x2 in x:\n",
18
+ " if x2>='a' and x2<='z':\n",
19
+ " flag=1\n",
20
+ " wh.append(x2)\n",
21
+ " elif flag==1:\n",
22
+ " wh.append(\" \")\n",
23
+ " wh=''.join(wh)\n",
24
+ " wh=wh.replace(\"model ans\",\"\")\n",
25
+ " wh=wh.replace(\"finetuned\",\"\")\n",
26
+ " wh=wh.replace(\" \",\" \")\n",
27
+ " wh=wh.replace(\"xlsx\",\"\")\n",
28
+ " df_temp=pd.read_excel(os.path.join(path,x))\n",
29
+ " rating=sum(df_temp[\"rating\"])/len(df_temp)\n",
30
+ " dat.append({\n",
31
+ " \"Model Name\":wh,\n",
32
+ " \"Average Rating\":rating\n",
33
+ " })\n",
34
+ " temp=pd.DataFrame(dat)\n",
35
+ " return temp"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": null,
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "import gradio as gr\n",
45
+ "import pandas as pd\n",
46
+ "\n",
47
+ "def bar_plot_fn():\n",
48
+ " temp=score_report_bar()\n",
49
+ " return gr.BarPlot(\n",
50
+ " temp,\n",
51
+ " x=\"Model Name\",\n",
52
+ " y=\"Average Rating\",\n",
53
+ " x_title=\"Model name\",\n",
54
+ " y_title=\"Average Rating\",\n",
55
+ " title=\"Simple Bar Plot with made up data\",\n",
56
+ " tooltip=[\"Model Name\", \"Average Rating\"],\n",
57
+ " y_lim=[1, 5],\n",
58
+ " width=200,\n",
59
+ " height=1000\n",
60
+ " )\n",
61
+ "with gr.Blocks() as bar_plot:\n",
62
+ " with gr.Row():\n",
63
+ " btn=gr.Button(\"test\")\n",
64
+ " with gr.Row():\n",
65
+ " plot = gr.BarPlot()\n",
66
+ " btn.click(bar_plot_fn, None, outputs=plot)\n",
67
+ "\n",
68
+ "bar_plot.launch()\t"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": null,
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "def parse_data(link,num=None): \n",
78
+ " from bs4 import BeautifulSoup\n",
79
+ " import requests\n",
80
+ " import re\n",
81
+ " from docx import Document \n",
82
+ " from langchain_community.document_loaders import WebBaseLoader\n",
83
+ " s=set()\n",
84
+ " import time\n",
85
+ " start_time = time.time()\n",
86
+ " duration = 5\n",
87
+ " def get_links(url):\n",
88
+ " response = requests.get(url)\n",
89
+ " data = response.text\n",
90
+ " soup = BeautifulSoup(data, 'lxml')\n",
91
+ "\n",
92
+ " links = []\n",
93
+ " for link in soup.find_all('a'):\n",
94
+ " link_url = link.get('href')\n",
95
+ " if link_url is not None and link_url.startswith('http'):\n",
96
+ " s.add(link_url)\n",
97
+ " links.append(link_url)\n",
98
+ " \n",
99
+ " return links\n",
100
+ " # def write_to_file(links):\n",
101
+ " # with open('data.txt', 'a') as f:\n",
102
+ " # f.writelines(links)\n",
103
+ " def get_all_links(url):\n",
104
+ " for link in get_links(url):\n",
105
+ " if (time.time() - start_time) >= duration:\n",
106
+ " return\n",
107
+ " get_all_links(link)\n",
108
+ "\n",
109
+ " def data_ret2(link):\n",
110
+ " loader = WebBaseLoader(f\"{link}\")\n",
111
+ " data = loader.load()\n",
112
+ " return data[0].page_content\n",
113
+ " # link = 'https://kuet.ac.bd'\n",
114
+ " s.add(link)\n",
115
+ " get_all_links(link)\n",
116
+ " li=list(s)\n",
117
+ " all_data=[]\n",
118
+ " if num==None:\n",
119
+ " num=len(li)\n",
120
+ " for idx,x in enumerate(li):\n",
121
+ " if idx==num:\n",
122
+ " break\n",
123
+ " try:\n",
124
+ " print(\"Link: \",x)\n",
125
+ " all_data.append(data_ret2(x))\n",
126
+ " except:\n",
127
+ " print(\"pass\")\n",
128
+ " continue\n",
129
+ " all_data2 = re.sub(r'\\n+', '\\n\\n', \"\\n\".join(all_data))\n",
130
+ " document = Document()\n",
131
+ " document.add_paragraph(all_data2)\n",
132
+ " document.save('docx_file.docx')"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": 3,
138
+ "metadata": {},
139
+ "outputs": [
140
+ {
141
+ "name": "stdout",
142
+ "output_type": "stream",
143
+ "text": [
144
+ "Link: http://library.kuet.ac.bd/\n",
145
+ "Link: http://kuet.portal.gov.bd/site/page/84728d9d-6059-41c4-940c-0f75eacf7d4c/Quarterly--semiannual-monitoring--evaluation-reports\n",
146
+ "Link: https://kuet.ac.bd/index.php/welcome/shownews/943\n"
147
+ ]
148
+ }
149
+ ],
150
+ "source": [
151
+ "parse_data(\"https://kuet.ac.bd\")"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": null,
157
+ "metadata": {},
158
+ "outputs": [],
159
+ "source": [
160
+ "import os\n",
161
+ "os.getcwd()"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": null,
167
+ "metadata": {},
168
+ "outputs": [],
169
+ "source": []
170
+ },
171
+ {
172
+ "cell_type": "code",
173
+ "execution_count": null,
174
+ "metadata": {},
175
+ "outputs": [],
176
+ "source": [
177
+ "df_all=[]\n",
178
+ "for x in os.listdir(\"save_ques_ans\"):\n",
179
+ " path=os.path.join(\"save_ques_ans\",x)\n",
180
+ " df_all.append(pd.read_excel(path))\n",
181
+ "df=pd.concat(df_all,axis=0)"
182
+ ]
183
+ },
184
+ {
185
+ "cell_type": "code",
186
+ "execution_count": null,
187
+ "metadata": {},
188
+ "outputs": [],
189
+ "source": []
190
+ },
191
+ {
192
+ "cell_type": "code",
193
+ "execution_count": null,
194
+ "metadata": {},
195
+ "outputs": [],
196
+ "source": [
197
+ "df"
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": null,
203
+ "metadata": {},
204
+ "outputs": [],
205
+ "source": []
206
+ },
207
+ {
208
+ "cell_type": "code",
209
+ "execution_count": null,
210
+ "metadata": {},
211
+ "outputs": [],
212
+ "source": [
213
+ "\n",
214
+ "doc=[]\n",
215
+ "for x in s:\n",
216
+ " print(x)\n",
217
+ " doc.extend(data_ret2(x))\n",
218
+ " "
219
+ ]
220
+ },
221
+ {
222
+ "cell_type": "code",
223
+ "execution_count": null,
224
+ "metadata": {},
225
+ "outputs": [],
226
+ "source": [
227
+ "doc"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "code",
232
+ "execution_count": null,
233
+ "metadata": {},
234
+ "outputs": [],
235
+ "source": []
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": null,
240
+ "metadata": {},
241
+ "outputs": [],
242
+ "source": [
243
+ "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
244
+ "if model_name==\"Mistral\":\n",
245
+ " path=\"models/full_KUET_LLM_mistral\"\n",
246
+ "elif model_name==\"Zepyhr\":\n",
247
+ " path=\"models/full_KUET_LLM_zepyhr\"\n",
248
+ "elif model_name==\"Llama2\":\n",
249
+ " path=\"models/full_KUET_LLM_llama\" \n",
250
+ "tokenizer = AutoTokenizer.from_pretrained(path)\n",
251
+ "model = AutoModelForCausalLM.from_pretrained(path,\n",
252
+ " device_map='auto',\n",
253
+ " torch_dtype=torch.float16,\n",
254
+ " use_auth_token=True,\n",
255
+ " load_in_8bit=True,\n",
256
+ " # load_in_4bit=True\n",
257
+ " )\n",
258
+ "model.push_to_hub(repo_id=f\"My_model_{model_name}\",token=hf)\n",
259
+ "tokenizer.push_to_hub(repo_id=f\"My_model_{model_name}\",token=hf)"
260
+ ]
261
+ },
262
+ {
263
+ "cell_type": "code",
264
+ "execution_count": 1,
265
+ "metadata": {},
266
+ "outputs": [
267
+ {
268
+ "data": {
269
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAAIjCAYAAADvBuGTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABvPElEQVR4nO3de5zM9eLH8ffM2It1C8uuW+5ZtwjR6qKydkUuXdxzq5/SSdSeKA7WppLOIZWQDlIS6bCni1bbhi42jlsodFMK65Lsuu6une/vD2Yy9s7sfr8zXs/HYx+n+c53Z97fmTG8z+fz/XxthmEYAgAAAABcFrvZAQAAAADAH1CuAAAAAMALKFcAAAAA4AWUKwAAAADwAsoVAAAAAHgB5QoAAAAAvIByBQAAAABeQLkCAAAAAC+gXAEAAACAF1CuAABFYrPZNGnSJFOee82aNbLZbFqzZo0pzw8AQH4oVwDgg9544w3ZbLY8f77++muzI16WWbNm6Y033jA7hodbb71VzZo1MztGoWRnZ2vBggW69dZbValSJQUFBalOnToaOnSoNm7caHY8APBbpcwOAAC4dE8//bTq1q2bY3uDBg1MSOM9s2bNUmhoqIYMGeKx/ZZbbtHp06cVGBhoTjAfcPr0ad19991KTEzULbfconHjxqlSpUr65Zdf9O6772rhwoXau3evatasaXZUAPA7lCsA8GF33HGH2rRpY3aMEmO32xUcHGx2DEsbPXq0EhMT9eKLL+qxxx7zuC8uLk4vvviiV57H6XQqMzOT9wMALsC0QADwU1lZWapUqZKGDh2a47709HQFBwfriSeekCRlZmZq4sSJat26tSpUqKAyZcro5ptv1urVqwt8niFDhqhOnTo5tk+aNEk2m81j24IFC3T77beratWqCgoKUpMmTTR79myPferUqaNvv/1Wa9eudU9zvPXWWyXlfc7VsmXL1Lp1a5UuXVqhoaG67777tG/fvhw5y5Ytq3379qlnz54qW7asqlSpoieeeELZ2dkFHmdhzZo1S02bNlVQUJCqV6+uRx55RMeOHfPY54cfftA999yj8PBwBQcHq2bNmurbt6/S0tLc+yQlJemmm27SVVddpbJly6pRo0YaN25cvs/9+++/67XXXlOnTp1yFCtJcjgceuKJJ9yjVkV572w2m0aMGKG3337bfXwffPBBoT9jkpSRkaG4uDg1aNBAQUFBqlWrlsaMGaOMjIx8jwsAfAUjVwDgw9LS0nTkyBGPbTabTZUrV1ZAQIDuuusuLV++XK+99prHVLqEhARlZGSob9++ks79Q/jf//63+vXrp2HDhun48eOaN2+eYmJitGHDBrVs2dIreWfPnq2mTZuqe/fuKlWqlD744AP97W9/k9Pp1COPPCJJmjFjhh599FGVLVtW//jHPyRJYWFheT7mG2+8oaFDh+r666/XlClTdPDgQb300kv66quvtGXLFl111VXufbOzsxUTE6N27drpX//6lz799FNNmzZN9evX18MPP3zZxzdp0iTFx8crKipKDz/8sHbv3q3Zs2frf//7n7766isFBAQoMzNTMTExysjI0KOPPqrw8HDt27dPH374oY4dO6YKFSro22+/1Z133qlrr71WTz/9tIKCgvTjjz/qq6++yvf5P/74Y509e1YDBw687GPJzWeffaZ3331XI0aMUGhoqBo2bFjoz5jT6VT37t315Zdf6sEHH1Tjxo21fft2vfjii/r++++VkJBQLJkBoEQZAACfs2DBAkNSrj9BQUHu/VatWmVIMj744AOP3+/SpYtRr1499+2zZ88aGRkZHvv8+eefRlhYmHH//fd7bJdkxMXFuW8PHjzYqF27do6McXFxxsV/zZw6dSrHfjExMR5ZDMMwmjZtanTo0CHHvqtXrzYkGatXrzYMwzAyMzONqlWrGs2aNTNOnz7t3u/DDz80JBkTJ070yCnJePrppz0e87rrrjNat26d47ku1qFDB6Np06Z53n/o0CEjMDDQiI6ONrKzs93bZ86caUgy5s+fbxiGYWzZssWQZCxbtizPx3rxxRcNScbhw4cLzHWhxx9/3JBkbNmypVD7F+W9k2TY7Xbj22+/9dhe2M/YW2+9ZdjtduOLL77w2G/OnDmGJOOrr74qVGYAsDKmBQKAD3v11VeVlJTk8fPxxx+777/99tsVGhqqpUuXurf9+eefSkpKUp8+fdzbHA6He9TB6XTq6NGjOnv2rNq0aaPNmzd7LW/p0qXd/+0adevQoYN+/vlnjylxhbVx40YdOnRIf/vb3zzO/enatasiIiL00Ucf5fid4cOHe9y++eab9fPPPxf5uS/26aefKjMzU4899pjs9r/+eh02bJjKly/vzlKhQgVJ0qpVq3Tq1KlcH8s12vbf//5XTqez0BnS09MlSeXKlbuUQyhQhw4d1KRJE49thf2MLVu2TI0bN1ZERISOHDni/rn99tslqVBTUAHA6pgWCAA+rG3btvkuaFGqVCndc889Wrx4sTIyMhQUFKTly5crKyvL4x++krRw4UJNmzZNu3btUlZWlnt7bqsRXqqvvvpKcXFxSklJyVEs0tLS3MWjsH799VdJUqNGjXLcFxERoS+//NJjW3BwsKpUqeKxrWLFivrzzz+L9LxFyRIYGKh69eq5769bt65iY2M1ffp0vf3227r55pvVvXt33Xfffe7j79Onj/7973/r//7v//TUU0+pY8eOuvvuu3Xvvfd6FLeLlS9fXpJ0/Pjxyz6e3OT2WSjsZ+yHH37Qzp07c7z+LocOHSqWzABQkhi5AgA/17dvXx0/ftw9ovXuu+8qIiJCLVq0cO+zaNEiDRkyRPXr19e8efOUmJiopKQk3X777QWOnFy88IHLxYtE/PTTT+rYsaOOHDmi6dOn66OPPlJSUpIef/xxSSrSCM2lcjgcxf4chTFt2jRt27ZN48aN0+nTpzVy5Eg1bdpUv//+u6RzI3yff/65Pv30Uw0cOFDbtm1Tnz591KlTp3wX34iIiJAkbd++vVA5CvveuVw48nihwnzGnE6nmjdvnmOk1fXzt7/9rVCZAcDKKFcA4OduueUWVatWTUuXLtWRI0f02Wef5Ri1eu+991SvXj0tX75cAwcOVExMjKKionTmzJkCH79ixYo5VsOT/hrJcfnggw+UkZGh999/Xw899JC6dOmiqKioXP/Bntc/+i9Wu3ZtSdLu3btz3Ld79273/SUhryyZmZnas2dPjizNmzfX+PHj9fnnn+uLL77Qvn37NGfOHPf9drtdHTt21PTp0/Xdd9/p2Wef1WeffZbv9Lk77rhDDodDixYtKlTmwr53BSnMZ6x+/fo6evSoOnbsqKioqBw/uY0+AoCvoVwBgJ+z2+2699579cEHH+itt97S2bNnc/zD1zWiYxiGe9v69euVkpJS4OPXr19faWlp2rZtm3vbgQMHtGLFigKfIy0tTQsWLMjxmGXKlMn1H/0Xa9OmjapWrao5c+Z4LOf98ccfa+fOneratWuBj+EtUVFRCgwM1Msvv+xxjPPmzVNaWpo7S3p6us6ePevxu82bN5fdbncfw9GjR3M8vmvFxvyWLa9Vq5aGDRumTz75RK+88kqO+51Op6ZNm+YeISvse1eQwnzGevfurX379un111/P8funT5/WyZMni/ScAGBFnHMFAD7s448/1q5du3Jsb9++verVq+e+3adPH73yyiuKi4tT8+bN1bhxY4/977zzTi1fvlx33XWXunbtqj179mjOnDlq0qSJTpw4kW+Gvn376sknn9Rdd92lkSNH6tSpU5o9e7auueYaj8UwoqOjFRgYqG7duumhhx7SiRMn9Prrr6tq1ao6cOCAx2O2bt1as2fP1jPPPKMGDRqoatWq7oUPLhQQEKCpU6dq6NCh6tChg/r16+deir1OnTruKYfecvjwYT3zzDM5ttetW1cDBgzQ2LFjFR8fr86dO6t79+7avXu3Zs2apeuvv1733XefpHPLmY8YMUK9evXSNddco7Nnz+qtt96Sw+HQPffcI0l6+umn9fnnn6tr166qXbu2Dh06pFmzZqlmzZq66aab8s04bdo0/fTTTxo5cqSWL1+uO++8UxUrVtTevXu1bNky7dq1y708emHfu8Io6DM2cOBAvfvuuxo+fLhWr16tG2+8UdnZ2dq1a5feffddrVq16oq6IDYAP2XyaoUAgEuQ31LskowFCxZ47O90Oo1atWoZkoxnnnkmx+M5nU7jueeeM2rXrm0EBQUZ1113nfHhhx/mulS3LlqK3TAM45NPPjGaNWtmBAYGGo0aNTIWLVqU63Le77//vnHttdcawcHBRp06dYypU6ca8+fPNyQZe/bsce+XmppqdO3a1ShXrpwhyb0s+8VLsbssXbrUuO6664ygoCCjUqVKxoABA4zff//dY5/BgwcbZcqUyXHsueXMTYcOHfJ8vTt27Ojeb+bMmUZERIQREBBghIWFGQ8//LDx559/uu//+eefjfvvv9+oX7++ERwcbFSqVMm47bbbjE8//dS9T3JystGjRw+jevXqRmBgoFG9enWjX79+xvfff19gTsM4t7T+v//9b+Pmm282KlSoYAQEBBi1a9c2hg4dmmOZ9sK+d5KMRx55JM/nLOgzZhjnls6fOnWq0bRpUyMoKMioWLGi0bp1ayM+Pt5IS0sr1LEBgJXZDOOCuQsAAAAAgEvCOVcAAAAA4AWUKwAAAADwAsoVAAAAAHgB5QoAAAAAvIByBQAAAABeQLkCAAAAAC/gIsK5cDqd2r9/v8qVKyebzWZ2HAAAAAAmMQxDx48fV/Xq1WW35z82RbnKxf79+1WrVi2zYwAAAACwiN9++001a9bMdx/KVS7KlSsn6dwLWL58eVOzZGVl6ZNPPlF0dLQCAgJMzWK1PFbKYrU8ZPGNPFbKYrU8ZPGNPFbKYrU8VspitTxk8Y08VspitvT0dNWqVcvdEfJDucqFaypg+fLlLVGuQkJCVL58eUt8sK2Ux0pZrJaHLL6Rx0pZrJaHLL6Rx0pZrJbHSlmslocsvpHHSlmsojCnC7GgBQAAAAB4AeUKAAAAALyAcgUAAAAAXkC5AgAAAAAvoFwBAAAAgBdQrgAAAADACyhXAAAAAOAFlCsAAAAA8ALKFQAAAAB4AeUKAAAAALyAcgUAAAAAXkC5AgAAAAAvoFwBAAAAgBdQriws22lo/Z6j2nTEpvV7jirbaZgdCQAAAEAeSpkdALlL3HFA8R98pwNpZyQ59OYPG1WtQrDiujVR52bVzI4HAAAA4CKMXFlQ4o4DenjR5vPF6i+paWf08KLNStxxwKRkAAAAAPJCubKYbKeh+A++U24TAF3b4j/4jimCAAAAgMVQrixmw56jOUasLmRIOpB2Rhv2HC25UAAAAAAKRLmymEPH8y5Wl7IfAAAAgJJBubKYquWCvbofAAAAgJJBubKYtnUrqVqFYNnyuN8mqVqFYLWtW6kkYwEAAAAoAOXKYhx2m+K6NZGkHAXLdTuuWxM57HnVLwAAAABmoFxZUOdm1TT7vlYKr+A59a9y2UDNvq8V17kCAAAALIhyZVGdm1XTl0/erkX3t1F4aack6YnoRhQrAAAAwKIoVxbmsNvUrm4lRVx17vau1OOm5gEAAACQN8qVD6hR5twFg787kG5yEgAAAAB5oVz5gBoh58rVzgPpMgzD5DQAAAAAckO58gFhpaUAh03Hz5zV73+eNjsOAAAAgFxQrnxAKbtUv0pZSUwNBAAAAKyKcuUjGlcrJ+nc1EAAAAAA1kO58hGNwylXAAAAgJVRrnyEq1wxLRAAAACwJsqVj4g4X65+O3pa6WeyTE4DAAAA4GKUKx9xVUiAqlcIliTtOsDFhAEAAACroVz5ENfo1bv/+00pP/2hbCfXvAIAAACsopTZAVA4q749qA2/HJUkvbf5d723+XdVqxCsuG5N1LlZNZPTAQAAAGDkygd884dNjy75Ricysj22p6ad0cOLNitxxwGTkgEAAABwoVxZXLbT0PJf7MptAqBrW/wH3zFFEAAAADAZ5criNv76p45l2vK835B0IO2MNuw5WnKhAAAAAORAubK4Q8czCrnfmWJOAgAAACA/lCuLq1ouqJD7BRdzEgAAAAD5oVxZXJvaFXVVoKG8JgbaJFWrEKy2dSuVZCwAAAAAF6FcWZzDbtPddZySlKNguW7HdWsihz3v87IAAAAAFD9LlKtXX31VderUUXBwsNq1a6cNGzbku/+yZcsUERGh4OBgNW/eXCtXrvS432az5frzz3/+szgPo9i0qGzolb4tFF7Bc+pfeIVgzb6vFde5AgAAACzA9HK1dOlSxcbGKi4uTps3b1aLFi0UExOjQ4cO5br/unXr1K9fPz3wwAPasmWLevbsqZ49e2rHjh3ufQ4cOODxM3/+fNlsNt1zzz0ldVheF9M0TF8+ebtm9GkhSQqw2/TFmNsoVgAAAIBFmF6upk+frmHDhmno0KFq0qSJ5syZo5CQEM2fPz/X/V966SV17txZo0ePVuPGjTV58mS1atVKM2fOdO8THh7u8fPf//5Xt912m+rVq1dSh1UsHHabul5bXXablOU0dPRkptmRAAAAAJxXyswnz8zM1KZNmzR27Fj3NrvdrqioKKWkpOT6OykpKYqNjfXYFhMTo4SEhFz3P3jwoD766CMtXLgwzxwZGRnKyPhryfP09HRJUlZWlrKysgp7OMXC9fwX5qhWIVj7jp3RnsPHVbG0w/Q8ZrFSFslaeciSNyvlsVIWyVp5yJI3K+WxUhbJWnmslEWyVh6y5M1KeayUxWxFeQ1shmEYxZglX/v371eNGjW0bt06RUZGurePGTNGa9eu1fr163P8TmBgoBYuXKh+/fq5t82aNUvx8fE6ePBgjv1feOEFPf/889q/f7+Cg3NfrnzSpEmKj4/PsX3x4sUKCQm5lEMrVq98a9eP6Xbd1yBb11cx7e0DAAAA/N6pU6fUv39/paWlqXz58vnua+rIVUmYP3++BgwYkGexkqSxY8d6jIalp6erVq1aio6OLvAFLG5ZWVlKSkpSp06dFBAQIEn6IuNb/bh5nypffY263Fbf9DxmsVIWq+Uhi2/ksVIWq+Uhi2/ksVIWq+WxUhar5SGLb+SxUhazuWa1FYap5So0NFQOhyPHiNPBgwcVHh6e6++Eh4cXev8vvvhCu3fv1tKlS/PNERQUpKCgnBfrDQgIsMyH6cIstSuXkSTtS8swLZ9VXxsrsFIesuTNSnmslEWyVh6y5M1KeayURbJWHitlkayVhyx5s1IeK2UxS1GO39QFLQIDA9W6dWslJye7tzmdTiUnJ3tME7xQZGSkx/6SlJSUlOv+8+bNU+vWrdWiRQvvBjfZ1ZXPTVX87egpk5MAAAAAcDF9WmBsbKwGDx6sNm3aqG3btpoxY4ZOnjypoUOHSpIGDRqkGjVqaMqUKZKkUaNGqUOHDpo2bZq6du2qJUuWaOPGjZo7d67H46anp2vZsmWaNm1aiR9TcatZkXIFAAAAWI3p5apPnz46fPiwJk6cqNTUVLVs2VKJiYkKCwuTJO3du1d2+18DbO3bt9fixYs1fvx4jRs3Tg0bNlRCQoKaNWvm8bhLliyRYRgeC1/4i6srnStXB9LPKPOsU4GlTF9RHwAAALjimV6uJGnEiBEaMWJErvetWbMmx7ZevXqpV69e+T7mgw8+qAcffNAb8SwntGygSgc4dDorW/uOnVbd0DJmRwIAAACueAx5+CCbzaZalUpLYmogAAAAYBWUKx9V6/x5V3spVwAAAIAlUK58VK3z51399iflCgAAALACypWPcpcrRq4AAAAAS6Bc+air3eXqtMlJAAAAAEiUK5/lWtCCc64AAAAAa6Bc+SjXghZpp7OUdjrL5DQAAAAAKFc+qkxQKVUuEyiJ864AAAAAK6Bc+TDXoha/s2IgAAAAYDrKlQ9zlSvOuwIAAADMR7nyYTUrBkuSPv/+iFJ++kPZTsPkRAAAAMCVq5TZAXBpEncc0OL1v0mSvvzxiL788YiqVQhWXLcm6tysmsnpAAAAgCsPI1c+KHHHAT28aHOOVQJT087o4UWblbjjgEnJAAAAgCsX5crHZDsNxX/wnXKbAOjaFv/Bd0wRBAAAAEoY5crHbNhzVAfSzuR5vyHpQNoZbdhztORCAQAAAKBc+ZpDx/MuVpeyHwAAAADvoFz5mKrlgr26HwAAAADvoFz5mLZ1K6lahWDZ8rjfJqlahWC1rVupJGMBAAAAVzzKlY9x2G2K69ZEknIULNftuG5N5LDnVb8AAAAAFAfKlQ/q3KyaZt/XSuEVPKf+hVcI1uz7WnGdKwAAAMAEXETYR3VuVk2dmoSr/fPJOpieofjuTXTfDXUYsQIAAABMwsiVD3PYbSoTeK4fN65WgWIFAAAAmIhy5eNs5/uU0+CiwQAAAICZKFc+zjVa5XRSrgAAAAAzUa58nP380FU2I1cAAACAqShXPs41cpXNyBUAAABgKsqVj3NPC2TkCgAAADAV5crH2Wyuc65MDgIAAABc4ShXPs5xfrVAzrkCAAAAzEW58nGsFggAAABYA+XKx7FaIAAAAGANlCsf5ypXDFwBAAAA5qJc+TimBQIAAADWQLnycXaucwUAAABYAuXKx7FaIAAAAGANlCsf5zrnyqBcAQAAAKaiXPm4v6YFmhwEAAAAuMJRrnycg6XYAQAAAEugXPk4VgsEAAAArIFy5ePOD1zJycgVAAAAYCrKlY9zsBQ7AAAAYAmUKx/nOueKkSsAAADAXJQrH8dqgQAAAIA1UK58nJ1zrgAAAABLoFz5OFYLBAAAAKzB9HL16quvqk6dOgoODla7du20YcOGfPdftmyZIiIiFBwcrObNm2vlypU59tm5c6e6d++uChUqqEyZMrr++uu1d+/e4joEU9m5zhUAAABgCaaWq6VLlyo2NlZxcXHavHmzWrRooZiYGB06dCjX/detW6d+/frpgQce0JYtW9SzZ0/17NlTO3bscO/z008/6aabblJERITWrFmjbdu2acKECQoODi6pwypRjFwBAAAA1lDKzCefPn26hg0bpqFDh0qS5syZo48++kjz58/XU089lWP/l156SZ07d9bo0aMlSZMnT1ZSUpJmzpypOXPmSJL+8Y9/qEuXLnrhhRfcv1e/fv18c2RkZCgjI8N9Oz09XZKUlZWlrKysyzvIy+R6/rxyGOdHrLKys0ska0F5SpKVskjWykOWvFkpj5WySNbKQ5a8WSmPlbJI1spjpSyStfKQJW9WymOlLGYrymtgMwxz5pNlZmYqJCRE7733nnr27OnePnjwYB07dkz//e9/c/zO1VdfrdjYWD322GPubXFxcUpISNA333wjp9OpChUqaMyYMfryyy+1ZcsW1a1bV2PHjvV4jotNmjRJ8fHxObYvXrxYISEhl3OYxW75L3atPWBXVHWnutVmyUAAAADAm06dOqX+/fsrLS1N5cuXz3df00aujhw5ouzsbIWFhXlsDwsL065du3L9ndTU1Fz3T01NlSQdOnRIJ06c0PPPP69nnnlGU6dOVWJiou6++26tXr1aHTp0yPVxx44dq9jYWPft9PR01apVS9HR0QW+gMUtKytLSUlJ6tSpkwICAnLcvy1xt9Ye+FV16tVTl5hrTM9TkqyUxWp5yOIbeayUxWp5yOIbeayUxWp5rJTFannI4ht5rJTFbK5ZbYVh6rRAb3M6z43c9OjRQ48//rgkqWXLllq3bp3mzJmTZ7kKCgpSUFBQju0BAQGW+TDllaVUKcf5/7KVaFZfeG3MYqU8ZMmblfJYKYtkrTxkyZuV8lgpi2StPFbKIlkrD1nyZqU8VspilqIcv2kLWoSGhsrhcOjgwYMe2w8ePKjw8PBcfyc8PDzf/UNDQ1WqVCk1adLEY5/GjRv7/WqBrGcBAAAAmMu0chUYGKjWrVsrOTnZvc3pdCo5OVmRkZG5/k5kZKTH/pKUlJTk3j8wMFDXX3+9du/e7bHP999/r9q1a3v5CKzB4S5XtCsAAADATKZOC4yNjdXgwYPVpk0btW3bVjNmzNDJkyfdqwcOGjRINWrU0JQpUyRJo0aNUocOHTRt2jR17dpVS5Ys0caNGzV37lz3Y44ePVp9+vTRLbfcottuu02JiYn64IMPtGbNGjMOsdjZzy/Fns3QFQAAAGAqU8tVnz59dPjwYU2cOFGpqalq2bKlEhMT3YtW7N27V3b7X4Nr7du31+LFizV+/HiNGzdODRs2VEJCgpo1a+be56677tKcOXM0ZcoUjRw5Uo0aNdJ//vMf3XTTTSV+fCXBwUWEAQAAAEswfUGLESNGaMSIEbnel9toU69evdSrV698H/P+++/X/fff7414lnd+4EomragPAAAA4DzTzrmCdzAtEAAAALAGypWPc7jLlclBAAAAgCsc5crHsVogAAAAYA2UKx93vltRrgAAAACTUa58nINzrgAAAABLoFz5OFe5YuQKAAAAMBflysfZbYxcAQAAAFZAufJxf5Urk4MAAAAAVzjKlY9znH8HuYgwAAAAYC7KlY9zj1xRrgAAAABTUa58HKsFAgAAANZAufJxdi4iDAAAAFgC5crH2V1LsbOgBQAAAGAqypWPc3DOFQAAAGAJlCsf51ot0Mk5VwAAAICpKFc+zsbIFQAAAGAJlCsf53AvaGFyEAAAAOAKR7nycQ73gha0KwAAAMBMlCsfZ+c6VwAAAIAlUK583PluxXWuAAAAAJNRrnycg4sIAwAAAJZAufJxTAsEAAAArIFy5ePcC1rQrQAAAABTUa58nOucK0auAAAAAHNRrnycnXOuAAAAAEugXPk4rnMFAAAAWAPlyse5Rq6yGbkCAAAATEW58nEO92qBJgcBAAAArnCUKx/nGrkyGLkCAAAATEW58nGO8+8g0wIBAAAAc1GufJz7nCsWtAAAAABMRbnycawWCAAAAFgD5crH/XWdK5ODAAAAAFc4ypWPs9tZih0AAACwAsqVj3PYmBYIAAAAWAHlysfZWS0QAAAAsATKlY/76zpXXOsKAAAAMBPlyse5pgVKLGoBAAAAmIly5eNcC1pIXOsKAAAAMBPlysc57BeOXFGuAAAAALNQrnzcBd2KcgUAAACYiHLl4+w2pgUCAAAAVkC58nEe0wKdJgYBAAAArnCUKx934WqBXOsKAAAAMI8lytWrr76qOnXqKDg4WO3atdOGDRvy3X/ZsmWKiIhQcHCwmjdvrpUrV3rcP2TIENlsNo+fzp07F+chmOaCbsW0QAAAAMBEpperpUuXKjY2VnFxcdq8ebNatGihmJgYHTp0KNf9161bp379+umBBx7Qli1b1LNnT/Xs2VM7duzw2K9z5846cOCA++edd94picMpcTabzb2oBRcRBgAAAMxjermaPn26hg0bpqFDh6pJkyaaM2eOQkJCNH/+/Fz3f+mll9S5c2eNHj1ajRs31uTJk9WqVSvNnDnTY7+goCCFh4e7fypWrFgSh2MK13lXTAsEAAAAzFPKzCfPzMzUpk2bNHbsWPc2u92uqKgopaSk5Po7KSkpio2N9dgWExOjhIQEj21r1qxR1apVVbFiRd1+++165plnVLly5VwfMyMjQxkZGe7b6enpkqSsrCxlZWVdyqF5jev588txbsVAQxmZWcrKKt63tDB5SoqVskjWykOWvFkpj5WySNbKQ5a8WSmPlbJI1spjpSyStfKQJW9WymOlLGYrymtgM0ycS7Z//37VqFFD69atU2RkpHv7mDFjtHbtWq1fvz7H7wQGBmrhwoXq16+fe9usWbMUHx+vgwcPSpKWLFmikJAQ1a1bVz/99JPGjRunsmXLKiUlRQ6HI8djTpo0SfHx8Tm2L168WCEhId441GI1er1DmU6bJlx3VqHBZqcBAAAA/MepU6fUv39/paWlqXz58vnua+rIVXHp27ev+7+bN2+ua6+9VvXr19eaNWvUsWPHHPuPHTvWYzQsPT1dtWrVUnR0dIEvYHHLyspSUlKSOnXqpICAgFz3Gbc5WZkZ2erQ4VbVrly8ZbAweUqKlbJYLQ9ZfCOPlbJYLQ9ZfCOPlbJYLY+VslgtD1l8I4+VspjNNautMEwtV6GhoXI4HO4RJ5eDBw8qPDw8198JDw8v0v6SVK9ePYWGhurHH3/MtVwFBQUpKCgox/aAgADLfJjyy+Jajt3mcJRYXl95bcxgpTxkyZuV8lgpi2StPGTJm5XyWCmLZK08VsoiWSsPWfJmpTxWymKWohy/qQtaBAYGqnXr1kpOTnZvczqdSk5O9pgmeKHIyEiP/SUpKSkpz/0l6ffff9cff/yhatWqeSe4xbgWtHCyFDsAAABgGtNXC4yNjdXrr7+uhQsXaufOnXr44Yd18uRJDR06VJI0aNAgjwUvRo0apcTERE2bNk27du3SpEmTtHHjRo0YMUKSdOLECY0ePVpff/21fvnlFyUnJ6tHjx5q0KCBYmJiTDnG4ma3sVogAAAAYDbTz7nq06ePDh8+rIkTJyo1NVUtW7ZUYmKiwsLCJEl79+6V3f5XB2zfvr0WL16s8ePHa9y4cWrYsKESEhLUrFkzSZLD4dC2bdu0cOFCHTt2TNWrV1d0dLQmT56c69Q/f2B3j1yZHAQAAAC4gpleriRpxIgR7pGni61ZsybHtl69eqlXr1657l+6dGmtWrXKm/Esz3XOlZORKwAAAMA0pk8LxOVzX0SYc64AAAAA01Cu/MD5gSvOuQIAAABMRLnyA66RKxOvBw0AAABc8ShXfsB1zlU2C1oAAAAApqFc+QE751wBAAAApqNc+YHz3YrVAgEAAAATUa78gJ2l2AEAAADTUa78AEuxAwAAAOajXPkBV7li5AoAAAAwD+XKD9hYLRAAAAAwXZHL1enTp3Xq1Cn37V9//VUzZszQJ5984tVgKDwHC1oAAAAApityuerRo4fefPNNSdKxY8fUrl07TZs2TT169NDs2bO9HhAFc08L5JwrAAAAwDRFLlebN2/WzTffLEl67733FBYWpl9//VVvvvmmXn75Za8HRMFcqwVmM3IFAAAAmKbI5erUqVMqV66cJOmTTz7R3XffLbvdrhtuuEG//vqr1wOiYO5yxcgVAAAAYJoil6sGDRooISFBv/32m1atWqXo6GhJ0qFDh1S+fHmvB0TBXNMCGbgCAAAAzFPkcjVx4kQ98cQTqlOnjtq1a6fIyEhJ50axrrvuOq8HRMHsXOcKAAAAMF2pov7Cvffeq5tuukkHDhxQixYt3Ns7duyou+66y6vhUDiu1QI55woAAAAwT5HLlSSFh4crPDxckpSenq7PPvtMjRo1UkREhFfDoXBc51yxWiAAAABgniJPC+zdu7dmzpwp6dw1r9q0aaPevXvr2muv1X/+8x+vB0TBXNMC6VYAAACAeYpcrj7//HP3UuwrVqyQYRg6duyYXn75ZT3zzDNeD4iCOViKHQAAADBdkctVWlqaKlWqJElKTEzUPffco5CQEHXt2lU//PCD1wOiYFxEGAAAADBfkctVrVq1lJKSopMnTyoxMdG9FPuff/6p4OBgrwdEwWyuBS0oVwAAAIBpirygxWOPPaYBAwaobNmyql27tm699VZJ56YLNm/e3Nv5UAjukSumBQIAAACmKXK5+tvf/qa2bdvqt99+U6dOnWS3nxv8qlevHudcmcR1zhXlCgAAADDPJS3F3qZNG7Vp00aGYcgwDNlsNnXt2tXb2VBIf11E2OQgAAAAwBWsyOdcSdKbb76p5s2bq3Tp0ipdurSuvfZavfXWW97OhkJi5AoAAAAwX5FHrqZPn64JEyZoxIgRuvHGGyVJX375pYYPH64jR47o8ccf93pI5O/8zEwWtAAAAABMVORy9corr2j27NkaNGiQe1v37t3VtGlTTZo0iXJlAjsjVwAAAIDpijwt8MCBA2rfvn2O7e3bt9eBAwe8EgpFw3WuAAAAAPMVuVw1aNBA7777bo7tS5cuVcOGDb0SCkXjGrnKZuQKAAAAME2RpwXGx8erT58++vzzz93nXH311VdKTk7OtXSh+LnLFasFAgAAAKYp8sjVPffco/Xr1ys0NFQJCQlKSEhQaGioNmzYoLvuuqs4MqIAjvPvosHIFQAAAGCaS7rOVevWrbVo0SKPbYcOHdJzzz2ncePGeSUYCu+v61xRrgAAAACzXNJ1rnJz4MABTZgwwVsPhyJwcM4VAAAAYDqvlSuYx70UOyNXAAAAgGkoV37ANS2QbgUAAACYh3LlB5gWCAAAAJiv0AtaxMbG5nv/4cOHLzsMLo1rtUCmBQIAAADmKXS52rJlS4H73HLLLZcVBpfGZmO1QAAAAMBshS5Xq1evLs4cuAwOzrkCAAAATMc5V37Adc6Vk3OuAAAAANNQrvwAFxEGAAAAzEe58gPnuxWrBQIAAAAmolz5Adc5VwblCgAAADAN5coP2FktEAAAADDdJZWrY8eO6ZNPPtGiRYv05ptvevxcildffVV16tRRcHCw2rVrpw0bNuS7/7JlyxQREaHg4GA1b95cK1euzHPf4cOHy2azacaMGZeUzRc43OdcmRwEAAAAuIIVeil2lw8++EADBgzQiRMnVL58efc1lqRz11saNGhQkR5v6dKlio2N1Zw5c9SuXTvNmDFDMTEx2r17t6pWrZpj/3Xr1qlfv36aMmWK7rzzTi1evFg9e/bU5s2b1axZM499V6xYoa+//lrVq1cv6mH6FNc5V6wWCAAAAJinyCNXf//733X//ffrxIkTOnbsmP7880/3z9GjR4scYPr06Ro2bJiGDh2qJk2aaM6cOQoJCdH8+fNz3f+ll15S586dNXr0aDVu3FiTJ09Wq1atNHPmTI/99u3bp0cffVRvv/22AgICipzLl9hZih0AAAAwXZFHrvbt26eRI0cqJCTksp88MzNTmzZt0tixY93b7Ha7oqKilJKSkuvvpKSkKDY21mNbTEyMEhIS3LedTqcGDhyo0aNHq2nTpgXmyMjIUEZGhvt2enq6JCkrK0tZWVlFOSSvcz1/vjmMc/MBz2Y7iz1vofKUECtlkayVhyx5s1IeK2WRrJWHLHmzUh4rZZGslcdKWSRr5SFL3qyUx0pZzFaU18BmFHGJubvvvlt9+/ZV7969ixzsYvv371eNGjW0bt06RUZGurePGTNGa9eu1fr163P8TmBgoBYuXKh+/fq5t82aNUvx8fE6ePCgJGnKlClavXq1Vq1aJZvNpjp16uixxx7TY489lmuOSZMmKT4+Psf2xYsXe6VEFreNh21660eHrqng1CNNOPEKAAAA8JZTp06pf//+SktLU/ny5fPdt8gjV127dtXo0aP13XffqXnz5jmm3HXv3r2oD+lVmzZt0ksvvaTNmzd7nA+Wn7Fjx3qMhqWnp6tWrVqKjo4u8AUsbllZWUpKSlKnTp3ynN7o3HZAb/24XRUrVVaXLtebnqekWCmL1fKQxTfyWCmL1fKQxTfyWCmL1fJYKYvV8pDFN/JYKYvZXLPaCqPI5WrYsGGSpKeffjrHfTabTdnZ2YV+rNDQUDkcDveIk8vBgwcVHh6e6++Eh4fnu/8XX3yhQ4cO6eqrr3bfn52drb///e+aMWOGfvnllxyPGRQUpKCgoBzbAwICLPNhyi9L4PnthmwlltdXXhszWCkPWfJmpTxWyiJZKw9Z8malPFbKIlkrj5WySNbKQ5a8WSmPlbKYpSjHX+QFLZxOZ54/RSlW0rkpfq1bt1ZycrLH4ycnJ3tME7xQZGSkx/6SlJSU5N5/4MCB2rZtm7Zu3er+qV69ukaPHq1Vq1YV8Wh9g+P8u+jkOlcAAACAaYo8cuVtsbGxGjx4sNq0aaO2bdtqxowZOnnypIYOHSpJGjRokGrUqKEpU6ZIkkaNGqUOHTpo2rRp6tq1q5YsWaKNGzdq7ty5kqTKlSurcuXKHs8REBCg8PBwNWrUqGQProS4LyLMaoEAAACAaS6pXK1du1b/+te/tHPnTklSkyZNNHr0aN18881Ffqw+ffro8OHDmjhxolJTU9WyZUslJiYqLCxMkrR3717Z7X8NsLVv316LFy/W+PHjNW7cODVs2FAJCQk5rnF1JXEvxc7IFQAAAGCaIperRYsWaejQobr77rs1cuRISdJXX32ljh076o033lD//v2LHGLEiBEaMWJErvetWbMmx7ZevXqpV69ehX783M6z8icOu+s6VyYHAQAAAK5gRS5Xzz77rF544QU9/vjj7m0jR47U9OnTNXny5EsqV7g89vPlKpt2BQAAAJimyAta/Pzzz+rWrVuO7d27d9eePXu8EgpF43BNC+ScKwAAAMA0RS5XtWrVyrFanyR9+umnqlWrlldCoWjOD1wxcgUAAACYqMjTAv/+979r5MiR2rp1q9q3by/p3DlXb7zxhl566SWvB0TB7HZGrgAAAACzFblcPfzwwwoPD9e0adP07rvvSpIaN26spUuXqkePHl4PiIKxoAUAAABgvktaiv2uu+7SXXfd5e0suETu61zRrgAAAADTFPmcK1gP51wBAAAA5ivUyFWlSpX0/fffKzQ0VBUrVpTt/EhJbo4ePeq1cCgcB+dcAQAAAKYrVLl68cUXVa5cOfd/51euUPLsLMUOAAAAmK5Q5Wrw4MHu/x4yZEhxZcElcrgvImxyEAAAAOAKVuRzrhwOhw4dOpRj+x9//CGHw+GVUCgaRq4AAAAA8xW5XBl5/AM+IyNDgYGBlx0IRec4/y6yoAUAAABgnkIvxf7yyy9Lkmw2m/7973+rbNmy7vuys7P1+eefKyIiwvsJUSBGrgAAAADzFbpcvfjii5LOjVzNmTPHYwpgYGCg6tSpozlz5ng/IQrkXi2QkSsAAADANIUuV3v27JEk3XbbbVq+fLkqVqxYbKFQNO6LCDNyBQAAAJim0OXKZfXq1cWRA5fB7h65MjkIAAAAcAUrcrmSpN9//13vv/++9u7dq8zMTI/7pk+f7pVgKDwH51wBAAAApityuUpOTlb37t1Vr1497dq1S82aNdMvv/wiwzDUqlWr4siIAthdqwVSrgAAAADTFHkp9rFjx+qJJ57Q9u3bFRwcrP/85z/67bff1KFDB/Xq1as4MqIArnOuDCPvpfIBAAAAFK8il6udO3dq0KBBkqRSpUrp9OnTKlu2rJ5++mlNnTrV6wFRMNe0QIlrXQEAAABmKXK5KlOmjPs8q2rVqumnn35y33fkyBHvJUOhuRa0kCS6FQAAAGCOIp9zdcMNN+jLL79U48aN1aVLF/3973/X9u3btXz5ct1www3FkREFcHiUK9oVAAAAYIYil6vp06frxIkTkqT4+HidOHFCS5cuVcOGDVkp0CQXdCumBQIAAAAmKXK5qlevnvu/y5Qpozlz5ng1EIrOfuE5V4xcAQAAAKYo8jlXsJ4LpwUaXEgYAAAAMEWRR67sdrtsF4yUXCw7O/uyAqHoHIxcAQAAAKYrcrlasWKFx+2srCxt2bJFCxcuVHx8vNeCofAuXC2Qc64AAAAAcxS5XPXo0SPHtnvvvVdNmzbV0qVL9cADD3glGIrGbju3DDurBQIAAADm8No5VzfccIOSk5O99XAoItd5V5QrAAAAwBxeKVenT5/Wyy+/rBo1anjj4XAJXCsGMi0QAAAAMEeRpwVWrFjRY0ELwzB0/PhxhYSEaNGiRV4Nh8Jzj1yxWiAAAABgiiKXqxdffNGjXNntdlWpUkXt2rVTxYoVvRoOheceuWJaIAAAAGCKIperIUOGFEMMXC7XgoGccwUAAACYo1Dlatu2bYV+wGuvvfaSw+DS/TUtkHIFAAAAmKFQ5aply5ay2WwyChgVsdlsXETYJK5yxbRAAAAAwByFKld79uwp7hy4TDZWCwQAAABMVahyVbt27eLOgcvkOF+uGLgCAAAAzFHkBS1cvvvuO+3du1eZmZke27t3737ZoVB07mmBjFwBAAAApihyufr555911113afv27R7nYbmnpXHOlSns5y8HzTlXAAAAgDnsRf2FUaNGqW7dujp06JBCQkL07bff6vPPP1ebNm20Zs2aYoiIwnBd54rVAgEAAABzFHnkKiUlRZ999plCQ0Nlt9tlt9t10003acqUKRo5cqS2bNlSHDlRAAcLWgAAAACmKvLIVXZ2tsqVKydJCg0N1f79+yWdW/Ri9+7d3k2HQrO7rnNFtwIAAABMUeSRq2bNmumbb75R3bp11a5dO73wwgsKDAzU3LlzVa9eveLIiEJwjVw5OecKAAAAMEWRy9X48eN18uRJSdLTTz+tO++8UzfffLMqV66spUuXej0gCud8t2JaIAAAAGCSIpermJgY9383aNBAu3bt0tGjR1WxYkX3ioEoee6l2Bm5AgAAAExR5HOuFi1a5B65cqlUqdJlFatXX31VderUUXBwsNq1a6cNGzbku/+yZcsUERGh4OBgNW/eXCtXrvS4f9KkSYqIiFCZMmVUsWJFRUVFaf369Zeczxe4ypVBuQIAAABMUeRy9fjjjyssLEz9+/fXypUrL/u6VkuXLlVsbKzi4uK0efNmtWjRQjExMTp06FCu+69bt079+vXTAw88oC1btqhnz57q2bOnduzY4d7nmmuu0cyZM7V9+3Z9+eWXqlOnjqKjo3X48OHLympldvdqgSYHAQAAAK5QRS5XBw4c0JIlS2Sz2dS7d29Vq1ZNjzzyiNatW3dJAaZPn65hw4Zp6NChatKkiebMmaOQkBDNnz8/1/1feuklde7cWaNHj1bjxo01efJktWrVSjNnznTv079/f0VFRalevXpq2rSppk+frvT0dG3btu2SMvoCO+dcAQAAAKYq8jlXpUqV0p133qk777xTp06d0ooVK7R48WLddtttqlmzpn766adCP1ZmZqY2bdqksWPHurfZ7XZFRUUpJSUl199JSUlRbGysx7aYmBglJCTk+Rxz585VhQoV1KJFi1z3ycjIUEZGhvt2enq6JCkrK0tZWVmFPp7i4Hr+gnK4ylVmMWcubJ6SYKUskrXykCVvVspjpSyStfKQJW9WymOlLJK18lgpi2StPGTJm5XyWCmL2YryGtiMyzxJ58iRI1qyZInmzJmjnTt3Fmma4P79+1WjRg2tW7dOkZGR7u1jxozR2rVrcz1PKjAwUAsXLlS/fv3c22bNmqX4+HgdPHjQve3DDz9U3759derUKVWrVk0JCQm6/vrrc80xadIkxcfH59i+ePFihYSEFPp4zPTKtw79mG7TkIbZui6U0SsAAADAG06dOqX+/fsrLS1N5cuXz3ffIo9cuZ5gxYoVevvtt5WcnKxatWqpX79+eu+99y4pcHG47bbbtHXrVh05ckSvv/66evfurfXr16tq1ao59h07dqzHaFh6erpq1aql6OjoAl/A4paVlaWkpCR16tRJAQEBee635OBG/Zh+VNe2bKku11YzPU9JsFIWq+Uhi2/ksVIWq+Uhi2/ksVIWq+WxUhar5SGLb+SxUhazuWa1FUaRy1Xfvn314YcfKiQkRL1799aECRM8Rp2KIjQ0VA6Hw2PESZIOHjyo8PDwXH8nPDy8UPuXKVNGDRo0UIMGDXTDDTeoYcOGmjdvnscURJegoCAFBQXl2B4QEGCZD1NBWRz2c6fP2ez2EsnsS69NSbNSHrLkzUp5rJRFslYesuTNSnmslEWyVh4rZZGslYcsebNSHitlMUtRjr/IC1o4HA69++67OnDggGbOnHnJxUo6N8WvdevWSk5Odm9zOp1KTk7O83EjIyM99pekpKSkAnM4nU6P86r8jd3OaoEAAACAmYo8cvX22297NUBsbKwGDx6sNm3aqG3btpoxY4ZOnjypoUOHSpIGDRqkGjVqaMqUKZKkUaNGqUOHDpo2bZq6du2qJUuWaOPGjZo7d64k6eTJk3r22WfVvXt3VatWTUeOHNGrr76qffv2qVevXl7NbiWO8wtaOLnOFQAAAGCKQo9cdenSRWlpae7bzz//vI4dO+a+/ccff6hJkyZFDtCnTx/961//0sSJE9WyZUtt3bpViYmJCgsLkyTt3btXBw4ccO/fvn17LV68WHPnzlWLFi303nvvKSEhQc2aNZN0bmRt165duueee3TNNdeoW7du+uOPP/TFF1+oadOmRc7nK1wXEXayFDsAAABgikKPXK1atcpjWt1zzz2n3r1766qrrpIknT17Vrt3776kECNGjNCIESNyvW/NmjU5tvXq1SvPUajg4GAtX778knL4MpvrIsKMXAEAAACmKPTI1cUrtl/mCu7wMoeNkSsAAADATEVe0ALW5J4WSLcCAAAATFHocmWz2dxTzy7cBmv4a7VA2hUAAABghkKfc2UYhoYMGeK+HtSZM2c0fPhwlSlTRpL8eplzX2BntUAAAADAVIUuV4MHD/a4fd999+XYZ9CgQZefCJfEdc4VI1cAAACAOQpdrhYsWFCcOXCZ7JxzBQAAAJiKBS38hHu1QKYFAgAAAKagXPkJ+/l3kmmBAAAAgDkoV37CzjlXAAAAgKkoV37CdZ0rLu4MAAAAmINy5SfcI1eUKwAAAMAUlCs/8de0QJODAAAAAFcoypWfcJx/J1ktEAAAADAH5cpPuK9zxYIWAAAAgCkoV37CwTlXAAAAgKkoV37Cdc4VI1cAAACAOShXfsI1LZCRKwAAAMAclCs/4ZoWyMAVAAAAYA7KlZ9wrxZIuwIAAABMQbnyEzb3da4oVwAAAIAZKFd+wsE5VwAAAICpKFd+wsFqgQAAAICpKFd+wn0RYboVAAAAYArKlZ84362YFggAAACYhHLlJ1znXDEtEAAAADAH5cpP2FktEAAAADAV5cpPODjnCgAAADAV5cpPuFcL5JwrAAAAwBSUKz9hcy1owdAVAAAAYArKlZ/4a1og5QoAAAAwA+XKT1CuAAAAAHNRrvwEqwUCAAAA5qJc+QlXuXI6TQ4CAAAAXKEoV37Ccf6dzGZaIAAAAGAKypWfsLMUOwAAAGAqypWfcC9owTlXAAAAgCkoV37CvaAFI1cAAACAKShXfsJud60WaHIQAAAA4ApFufITjvMjVwYjVwAAAIApKFd+wu5aLZBzrgAAAABTUK78BOdcAQAAAOaiXPkJVgsEAAAAzEW58hN/XefK5CAAAADAFYpy5Scc7tUCaVcAAACAGShXfuJ8t5KTc64AAAAAU1iiXL366quqU6eOgoOD1a5dO23YsCHf/ZctW6aIiAgFBwerefPmWrlypfu+rKwsPfnkk2revLnKlCmj6tWra9CgQdq/f39xH4ap3AtaMHIFAAAAmML0crV06VLFxsYqLi5OmzdvVosWLRQTE6NDhw7luv+6devUr18/PfDAA9qyZYt69uypnj17aseOHZKkU6dOafPmzZowYYI2b96s5cuXa/fu3erevXtJHlaJcy9oQbcCAAAATGF6uZo+fbqGDRumoUOHqkmTJpozZ45CQkI0f/78XPd/6aWX1LlzZ40ePVqNGzfW5MmT1apVK82cOVOSVKFCBSUlJal3795q1KiRbrjhBs2cOVObNm3S3r17S/LQStRf5Yp2BQAAAJihlJlPnpmZqU2bNmns2LHubXa7XVFRUUpJScn1d1JSUhQbG+uxLSYmRgkJCXk+T1pammw2m6666qpc78/IyFBGRob7dnp6uqRzUwyzsrIKeTTFw/X8BeXIPnv23P86ncWaubB5SoKVskjWykOWvFkpj5WySNbKQ5a8WSmPlbJI1spjpSyStfKQJW9WymOlLGYrymtgMwzzhjr279+vGjVqaN26dYqMjHRvHzNmjNauXav169fn+J3AwEAtXLhQ/fr1c2+bNWuW4uPjdfDgwRz7nzlzRjfeeKMiIiL09ttv55pj0qRJio+Pz7F98eLFCgkJuZRDK3GHTkvPbi2lYIehqW2zzY4DAAAA+IVTp06pf//+SktLU/ny5fPd19SRq+KWlZWl3r17yzAMzZ49O8/9xo4d6zEalp6erlq1aik6OrrAF7C4ZWVlKSkpSZ06dVJAQECe+/169JSe3fqlHI5S6tIlxvQ8JcFKWayWhyy+kcdKWayWhyy+kcdKWayWx0pZrJaHLL6Rx0pZzOaa1VYYppar0NBQORyOHCNOBw8eVHh4eK6/Ex4eXqj9XcXq119/1WeffZZvSQoKClJQUFCO7QEBAZb5MBWUJej8fdmGUSKZfem1KWlWykOWvFkpj5WySNbKQ5a8WSmPlbJI1spjpSyStfKQJW9WymOlLGYpyvGbuqBFYGCgWrdureTkZPc2p9Op5ORkj2mCF4qMjPTYX5KSkpI89ncVqx9++EGffvqpKleuXDwHYCF214IWTpODAAAAAFco06cFxsbGavDgwWrTpo3atm2rGTNm6OTJkxo6dKgkadCgQapRo4amTJkiSRo1apQ6dOigadOmqWvXrlqyZIk2btyouXPnSjpXrO69915t3rxZH374obKzs5WamipJqlSpkgIDA8050GLmcF3nitUCAQAAAFOYXq769Omjw4cPa+LEiUpNTVXLli2VmJiosLAwSdLevXtlt/81wNa+fXstXrxY48eP17hx49SwYUMlJCSoWbNmkqR9+/bp/ffflyS1bNnS47lWr16tW2+9tUSOq6S5XiKWYgcAAADMYXq5kqQRI0ZoxIgRud63Zs2aHNt69eqlXr165bp/nTp1ZOICiKZxjVwZhmQYhmznbwMAAAAoGaZfRBjeYb+gTGU7r7xyCQAAAJiNcuUnXAtaSJx3BQAAAJiBcuUnHBeUK1YMBAAAAEoe5cpPOC6YFsiiFgAAAEDJo1z5iQvXr2BaIAAAAFDyKFd+wnNaIOUKAAAAKGmUKz/hYLVAAAAAwFSUKz9x4WqBdCsAAACg5FGu/IirX7GgBQAAAFDyKFd+xHXeFdMCAQAAgJJHufIjdhvlCgAAADAL5cqPuEaumBUIAAAAlDzKlR9xj1zRrgAAAIASR7nyI64FLZgWCAAAAJQ8ypUfcU0LZLVAAAAAoORRrvwI5QoAAAAwD+XKj9hYLRAAAAAwDeXKjzjOlyun0+QgAAAAwBWIcuVH3BcRZlogAAAAUOIoV37Efv7d5JwrAAAAoORRrvyI3T0tkHIFAAAAlDTKlR9xsKAFAAAAYBrKlR+xc84VAAAAYBrKlR9xjVzRrQAAAICSR7nyI+6RK6YFAgAAACWOcuVHzncrpgUCAAAAJqBc+RHXda5YLRAAAAAoeZQrP+Jeip1uBQAAAJQ4ypUfcXDOFQAAAGAaypUfcZ1z5eScKwAAAKDEUa78iJ2LCAMAAACmoVz5EfeCFoxcAQAAACWOcuVHKFcAAACAeShXfsTmnhZochAAAADgCkS58iMO14IWnHMFAAAAlDjKlR9hWiAAAABgHsqVH3GvFki5AgAAAEoc5cqPuMoV0wIBAACAkke58iOuaYFc5woAAAAoeZQrP2J3lSu6FQAAAFDiKFd+xLVaoME5VwAAAECJo1z5EfeCFkwLBAAAAEoc5cqP/DUtkHIFAAAAlDTKlR9xsFogAAAAYBrKlR+xuy8ibHIQAAAA4Apkerl69dVXVadOHQUHB6tdu3basGFDvvsvW7ZMERERCg4OVvPmzbVy5UqP+5cvX67o6GhVrlxZNptNW7duLcb01nK+W3HOFQAAAGACU8vV0qVLFRsbq7i4OG3evFktWrRQTEyMDh06lOv+69atU79+/fTAAw9oy5Yt6tmzp3r27KkdO3a49zl58qRuuukmTZ06taQOwzIc7pEryhUAAABQ0kwtV9OnT9ewYcM0dOhQNWnSRHPmzFFISIjmz5+f6/4vvfSSOnfurNGjR6tx48aaPHmyWrVqpZkzZ7r3GThwoCZOnKioqKiSOgzLYLVAAAAAwDylzHrizMxMbdq0SWPHjnVvs9vtioqKUkpKSq6/k5KSotjYWI9tMTExSkhIuKwsGRkZysjIcN9OT0+XJGVlZSkrK+uyHvtyuZ6/MDlsOleqzmZnF1vuouQpblbKIlkrD1nyZqU8VsoiWSsPWfJmpTxWyiJZK4+VskjWykOWvFkpj5WymK0or4HNMOmKs/v371eNGjW0bt06RUZGurePGTNGa9eu1fr163P8TmBgoBYuXKh+/fq5t82aNUvx8fE6ePCgx76//PKL6tatqy1btqhly5b5Zpk0aZLi4+NzbF+8eLFCQkKKeGTmSfjFrtUH7Lq9mlM96jjNjgMAAAD4vFOnTql///5KS0tT+fLl893XtJErKxk7dqzHiFh6erpq1aql6OjoAl/A4paVlaWkpCR16tRJAQEB+e67Y9X3Wn3gF9WuW1dd7mhkep7iZqUsVstDFt/IY6UsVstDFt/IY6UsVstjpSxWy0MW38hjpSxmc81qKwzTylVoaKgcDkeOEaeDBw8qPDw8198JDw8v0v6FFRQUpKCgoBzbAwICLPNhKkyWgFIOSZIhW7Hn9rXXpiRZKQ9Z8malPFbKIlkrD1nyZqU8VsoiWSuPlbJI1spDlrxZKY+VspilKMdv2oIWgYGBat26tZKTk93bnE6nkpOTPaYJXigyMtJjf0lKSkrKc/8rjWu1QJNmegIAAABXNFOnBcbGxmrw4MFq06aN2rZtqxkzZujkyZMaOnSoJGnQoEGqUaOGpkyZIkkaNWqUOnTooGnTpqlr165asmSJNm7cqLlz57of8+jRo9q7d6/2798vSdq9e7ekc6NelzvCZXU212qBlCsAAACgxJlarvr06aPDhw9r4sSJSk1NVcuWLZWYmKiwsDBJ0t69e2W3/zW41r59ey1evFjjx4/XuHHj1LBhQyUkJKhZs2bufd5//313OZOkvn37SpLi4uI0adKkkjkwkzjcS7GbHAQAAAC4Apm+oMWIESM0YsSIXO9bs2ZNjm29evVSr1698ny8IUOGaMiQIV5K51sc53uok+tcAQAAACXO1IsIw7vs58+5cjItEAAAAChxlCs/YuecKwAAAMA0lCs/4jrnimmBAAAAQMmjXPkR17TAbLoVAAAAUOIoV37Eca5bcc4VAAAAYALKlR9xL2jBtEAAAACgxFGu/Ih7QQvKFQAAAFDiKFd+xMFS7AAAAIBpKFd+xL1aIN0KAAAAKHGUKz9yvlsxLRAAAAAwAeXKjzAtEAAAADAP5cqPuMoVI1cAAABAyaNc+RG7jZErAAAAwCyUKz/iLldOk4MAAAAAVyDKlR9xnH83sxm5AgAAAEoc5cqPcBFhAAAAwDyUKz/iWtDCYOQKAAAAKHGUKz/iHrmiXAEAAAAljnLlR+zupdhNDgIAAABcgShXfsThXi2QkSsAAACgpFGu/Iid1QIBAAAA01Cu/AgXEQYAAADMQ7nyI67VApkWCAAAAJQ8ypUfYbVAAAAAwDyUKz/y18iVyUEAAACAKxDlyo84OOcKAAAAMA3lyo+c71bK5pwrAAAAoMRRrvyIe1ogI1cAAABAiaNc+RFXuWLkCgAAACh5lCs/8td1rkwOAgAAAFyBKFd+5PzAFde5AgAAAExAufIj7mmBnHMFAAAAlDjKlR9xX0SYkSsAAACgxFGu/Ihr5IqBKwAAAKDkUa78iHvkinYFAAAAlDjKlR+xn383mRYIAAAAlDzKlR9xnB+5klgxEAAAAChplCs/4jrnSpKcTA0EAAAAShTlyo/YLhi54rwrAAAAoGRRrvyIx8iV08QgAAAAwBWIcuVHHIxcAQAAAKahXPkRQ38VqvU//8GqgQAAAEAJKmV2AHhH4o4DmvT+d+7bDyzcqPDyQerX9mrVCS2jquWC1bZuJY+pg9lOQxv2HNWh42dyvR8AAABA4VGu/EDijgN6eNFmXTxOlZqeoRc//cF9+8Ky9cuRU3pnw16lpp/J9f6q5YLVunZFbfr1T3f5al27ojbsOapNR2yqvOeo2tarkuP+C29T1q4MF5b00DJBkk06ciIj189MZIOqfCZQoGynofV81xQL/k81AChelCsfl+00FP/BdzmKVW4uLlsF3W+3SRfOLPzrtkNv/rAxn/vPqVQmQHe1rKGoJuG5FrXLvV2UolcSt115Kv70hxylSnkUDLOyFPdrk1tJv9DFn5nCFHhfeW1yK5JWfZ+8nccbx164z1ThvmuK83MVWiZIZ7PP+uT7VJg/r/m9doV5n331M8yf7ypePVZffW386c93cXzGrfI+VS3nW/9HkM0wzF/54NVXX9U///lPpaamqkWLFnrllVfUtm3bPPdftmyZJkyYoF9++UUNGzbU1KlT1aVLF/f9hmEoLi5Or7/+uo4dO6Ybb7xRs2fPVsOGDQuVJz09XRUqVFBaWprKly9/2cd3ObKysrRy5Up16dJFAQEBOe5P+ekP9Xv9axOSFU1B/zjyt9sXMjtLSR5rUZmdnff50m9fyEqfqeLOY/brXtyvrT8f+5X8vnOsed++kNlZOPa8b1erEKy4bk3UuVm13A+gmBWlG5i+oMXSpUsVGxuruLg4bd68WS1atFBMTIwOHTqU6/7r1q1Tv3799MADD2jLli3q2bOnevbsqR07drj3eeGFF/Tyyy9rzpw5Wr9+vcqUKaOYmBidOZP7/7vuyw4d941juvgPs7/ftlKWkjzWojI7O+/zpd8uqce+FL50rFb78+rPx34lv+8ca963rZSFY8/7dmraGT28aLMSdxyQ1Zk+ctWuXTtdf/31mjlzpiTJ6XSqVq1aevTRR/XUU0/l2L9Pnz46efKkPvzwQ/e2G264QS1bttScOXNkGIaqV6+uv//973riiSckSWlpaQoLC9Mbb7yhvn37FpiJkSsAAADAOmySwisE68snby/xKYJF6QamnnOVmZmpTZs2aezYse5tdrtdUVFRSklJyfV3UlJSFBsb67EtJiZGCQkJkqQ9e/YoNTVVUVFR7vsrVKigdu3aKSUlJddylZGRoYyMDPft9PR0SeeKTVZW1iUfnze4nj+vHNfVLKfw8kE6mJ5RqPOuAAAAAF9jSDqQdkYpPx5Su7qVSvS5i9IHTC1XR44cUXZ2tsLCwjy2h4WFadeuXbn+Tmpqaq77p6amuu93bctrn4tNmTJF8fHxObZ/8sknCgkJKdzBFLOkpKQ87+sSbtP8dNcMz5Jt8gAAAEBJ+eSL9fpjZ8kOKZw6darQ+7JaoKSxY8d6jIalp6erVq1aio6OtsS0wKSkJHXq1CnXaYGS1EVSq28P6pmVu5SanpHrPgAAAICvi765XYmPXLlmtRWGqeUqNDRUDodDBw8e9Nh+8OBBhYeH5/o74eHh+e7v+t+DBw+qWrVqHvu0bNky18cMCgpSUFBQju0BAQF5FpqSVlCWO1vW1B3X1nBfv6SgJbIBAAAAX+E658qMa2YWpQ+YulpgYGCgWrdureTkZPc2p9Op5ORkRUZG5vo7kZGRHvtL56bMufavW7euwsPDPfZJT0/X+vXr83xMf+Gw2xRZv7J6tKyhUVEN9dVTt+udYTfopb4t9XjUNQovH+yxf3j5ID0e1TDP+y/+3Bb1NsBnAt7Adw0AXNlcX/tx3ZpY/npXpk8LjI2N1eDBg9WmTRu1bdtWM2bM0MmTJzV06FBJ0qBBg1SjRg1NmTJFkjRq1Ch16NBB06ZNU9euXbVkyRJt3LhRc+fOlSTZbDY99thjeuaZZ9SwYUPVrVtXEyZMUPXq1dWzZ0+zDtMUrrLlMuL2Bu6RrdwuyHbx/bleUO7nw/rki/WKvrldnheUS/ouVQlb9+voyUz3Y5t9fQSuHVF8ty+8CGluFyV0fWYq1rpGSzfu8xhNNTs77/Ol375QcXymereuqWO/f5/vd01eo/S+dKxm/nktzGvnT8d+Jb/vHGvety9kdhaOPe/b4SZf56ooTC9Xffr00eHDhzVx4kSlpqaqZcuWSkxMdC9IsXfvXtntfw2wtW/fXosXL9b48eM1btw4NWzYUAkJCWrWrJl7nzFjxujkyZN68MEHdezYMd10001KTExUcHBwjue/klxctgpz/8W329WtpD92GmpXt5ICStlz3T+yfmX9o2uTAovaZV/FvBBFr0Svqn4+T9SNbeUoVapEr3Zv5mtT0FXTXZ+ZLrfV18ioRsX+uSip16YwV7e30vvkzTzeOPaCPlPO7LNauXJ3vt81LoX5P4Yu530+m31WyV9t8Ln3qTB/XvN77QrzPvvqZ5g/31W8eqy++tr4259vb3/GrfI+FebfGpZiIIe0tDRDkpGWlmZ2FCMzM9NISEgwMjMzzY5iGIa18lgpi2FYKw9Z8malPFbKYhjWykOWvFkpj5WyGIa18lgpi2FYKw9Z8malPFbKYraidANTz7kCAAAAAH9BuQIAAAAAL6BcAQAAAIAXUK4AAAAAwAsoVwAAAADgBZQrAAAAAPACyhUAAAAAeAHlCgAAAAC8gHIFAAAAAF5AuQIAAAAAL6BcAQAAAIAXUK4AAAAAwAsoVwAAAADgBaXMDmBFhmFIktLT001OImVlZenUqVNKT09XQECA2XEslcdKWayWhyy+kcdKWayWhyy+kcdKWayWx0pZrJaHLL6Rx0pZzObqBK6OkB/KVS6OHz8uSapVq5bJSQAAAABYwfHjx1WhQoV897EZhalgVxin06n9+/erXLlystlspmZJT09XrVq19Ntvv6l8+fKmZrFaHitlsVoesvhGHitlsVoesvhGHitlsVoeK2WxWh6y+EYeK2Uxm2EYOn78uKpXry67Pf+zqhi5yoXdblfNmjXNjuGhfPnylvpgWymPlbJI1spDlrxZKY+VskjWykOWvFkpj5WySNbKY6UskrXykCVvVspjpSxmKmjEyoUFLQAAAADACyhXAAAAAOAFlCuLCwoKUlxcnIKCgsyOIslaeayURbJWHrLkzUp5rJRFslYesuTNSnmslEWyVh4rZZGslYcsebNSHitl8SUsaAEAAAAAXsDIFQAAAAB4AeUKAAAAALyAcgUAAAAAXkC5AgAAAAAvoFxZ1Oeff65u3bqpevXqstlsSkhIsMxzG4ahiRMnqlq1aipdurSioqL0ww8/mJbn4MGDGjJkiKpXr66QkBB17ty52PJMmTJF119/vcqVK6eqVauqZ8+e2r17t8c+qampGjhwoMLDw1WmTBm1atVK//nPf7yeZfbs2br22mvdF/eLjIzUxx9/LEn65ZdfZLPZcv1ZtmyZ17Nc7Pnnn5fNZtNjjz3msT0lJUW33367ypQpo/Lly+uWW27R6dOnvf78kyZNynHcERER7vtvvfXWHPcPHz7c6zlc9u3bp/vuu0+VK1dW6dKl1bx5c23cuDHXfYcPHy6bzaYZM2YUS5Y6derk+rl45JFHJEkPPfSQ6tevr9KlS6tKlSrq0aOHdu3aVSxZsrOzNWHCBNWtW1elS5dW/fr1NXnyZLnWWcrKytKTTz6p5s2bq0yZMqpevboGDRqk/fv3F0ue48eP67HHHlPt2rVVunRptW/fXv/73//c9y9fvlzR0dGqXLmybDabtm7d6rXn9sb37tGjRzVgwACVL19eV111lR544AGdOHHCtDybN29Wp06ddNVVV6ly5cp68MEHLylPQVkK875483s5vzyF+cyuWbMmz+/nCz9vl5tFOvddGBERoTJlyqhixYqKiorS+vXrczzORx99pHbt2ql06dKqWLGievbsWaQchclyoby+53L7fnr++eeLnKUweYYMGZLjuTp37pzrY2VkZKhly5aX/Oe+MK/Nzp071b17d1WoUEFlypTR9ddfr71797rv9+Z3c0F58vp8/vOf/5R07t8YDzzwgMd3d1xcnDIzMy8pj7+hXFnUyZMn1aJFC7366quWe+4XXnhBL7/8subMmaP169erTJkyiomJ0ZkzZ0o8j2EY6tmzp37++Wf997//1ZYtW1S7dm1FRUXp5MmTXs+ydu1aPfLII/r666+VlJSkrKwsRUdHezzXoEGDtHv3br3//vvavn277r77bvXu3VtbtmzxapaaNWvq+eef16ZNm7Rx40bdfvvt6tGjh7799lvVqlVLBw4c8PiJj49X2bJldccdd3g1x8X+97//6bXXXtO1117rsT0lJUWdO3dWdHS0NmzYoP/9738aMWKE7Pbi+Rpq2rSpx/F/+eWXHvcPGzbM4/4XXnihWHL8+eefuvHGGxUQEKCPP/5Y3333naZNm6aKFSvm2HfFihX6+uuvVb169WLJIp17fy487qSkJElSr169JEmtW7fWggULtHPnTq1atUqGYSg6OlrZ2dlezzJ16lTNnj1bM2fO1M6dOzV16lS98MILeuWVVyRJp06d0ubNmzVhwgRt3rxZy5cv1+7du9W9e3evZ5Gk//u//1NSUpLeeustbd++XdHR0YqKitK+ffsknfsuuummmzR16lSvP7c3vncHDBigb7/9VklJSfrwww/1+eef68EHHzQlz/79+xUVFaUGDRpo/fr1SkxM1LfffqshQ4Z4PUth3hdvfi/nl6cwn9n27dvn+H7+v//7P9WtW1dt2rTxWhZJuuaaazRz5kxt375dX375perUqaPo6GgdPnzYvc9//vMfDRw4UEOHDtU333yjr776Sv379y9SjsJkcSnoe+7pp5/2eG0effTRImcpbJ7OnTt7PNc777yT635jxoy5rO/lgrL89NNPuummmxQREaE1a9Zo27ZtmjBhgoKDg937ePO7uaA8F38+58+fL5vNpnvuuUeStGvXLjmdTr322mv69ttv9eKLL2rOnDkaN25ckbP4JQOWJ8lYsWKFJZ7b6XQa4eHhxj//+U/3tmPHjhlBQUHGO++8U+J5du/ebUgyduzY4d6WnZ1tVKlSxXj99deLPc+hQ4cMScbatWvd28qUKWO8+eabHvtVqlSpRPJUrFjR+Pe//53rfS1btjTuv//+Yn3+48ePGw0bNjSSkpKMDh06GKNGjXLf165dO2P8+PHF+vwucXFxRosWLfK8/+JsxenJJ580brrppgL3+/33340aNWoYO3bsMGrXrm28+OKLxR/OMIxRo0YZ9evXN5xOZ673f/PNN4Yk48cff/T6c3ft2jXHZ/Luu+82BgwYkOfvbNiwwZBk/Prrr17NcurUKcPhcBgffvihx/ZWrVoZ//jHPzy27dmzx5BkbNmyxasZXC7le/e7774zJBn/+9//3Pt8/PHHhs1mM/bt21fieV577TWjatWqRnZ2tnufbdu2GZKMH374wWtZLpTf+1Jc38uF+fu5oM9sZmamUaVKFePpp58u9ixpaWmGJOPTTz81DMMwsrKyjBo1auT594a3sxT0PVdc33255Rk8eLDRo0ePAn935cqVRkREhPHtt9965c99bln69Olj3HfffUV6HG99Nxfmc9OjRw/j9ttvz3efF154wahbt+5lZfEXjFyhSPbs2aPU1FRFRUW5t1WoUEHt2rVTSkpKiefJyMiQJI//d8dutysoKCjHSEVxSEtLkyRVqlTJva19+/ZaunSpjh49KqfTqSVLlujMmTO69dZbiy1Hdna2lixZopMnTyoyMjLH/Zs2bdLWrVv1wAMPFFsGSXrkkUfUtWtXj8+HJB06dEjr169X1apV1b59e4WFhalDhw7F+h798MMPql69uurVq6cBAwZ4TK+QpLfffluhoaFq1qyZxo4dq1OnThVLjvfff19t2rRRr169VLVqVV133XV6/fXXPfZxOp0aOHCgRo8eraZNmxZLjtxkZmZq0aJFuv/++2Wz2XLcf/LkSS1YsEB169ZVrVq1vP787du3V3Jysr7//ntJ0jfffKMvv/wy39HVtLQ02Ww2XXXVVV7NcvbsWWVnZ3t8l0hS6dKlS+S7JD+F+d5NSUnRVVdd5THyERUVJbvdnus0sOLOk5GRocDAQI+R6dKlS0uSKa+nGd/LLgV9Zt9//3398ccfGjp0aLHmyMzM1Ny5c1WhQgW1aNFC0rmpm/v27ZPdbtd1112natWq6Y477tCOHTu8/vyF/Z57/vnnVblyZV133XX65z//qbNnz3o9i8uaNWtUtWpVNWrUSA8//LD++OMPj/sPHjyoYcOG6a233lJISEixZHA6nfroo490zTXXKCYmRlWrVlW7du3ynVZZ3N/NFzp48KA++uijAv/9kJaW5vFvoSsZ5QpFkpqaKkkKCwvz2B4WFua+ryRFRETo6quv1tixY/Xnn38qMzNTU6dO1e+//64DBw4U63M7nU499thjuvHGG9WsWTP39nfffVdZWVmqXLmygoKC9NBDD2nFihVq0KCB1zNs375dZcuWVVBQkIYPH64VK1aoSZMmOfabN2+eGjdurPbt23s9g8uSJUu0efNmTZkyJcd9P//8s6Rz8/+HDRumxMREtWrVSh07diyW8+PatWunN954Q4mJiZo9e7b27Nmjm2++WcePH5ck9e/fX4sWLdLq1as1duxYvfXWW7rvvvu8nkM6d+yzZ89Ww4YNtWrVKj388MMaOXKkFi5c6N5n6tSpKlWqlEaOHFksGfKSkJCgY8eO5ZiqNWvWLJUtW1Zly5bVxx9/rKSkJAUGBnr9+Z966in17dtXERERCggI0HXXXafHHntMAwYMyHX/M2fO6Mknn1S/fv1Uvnx5r2YpV66cIiMjNXnyZO3fv1/Z2dlatGiRUlJSiv27pCCF+d5NTU1V1apVPe4vVaqUKlWq5PXv5sLkuf3225Wamqp//vOfyszM1J9//qmnnnpKkkx5PUvye/lChfnMzps3TzExMapZs2axZPjwww9VtmxZBQcH68UXX1RSUpJCQ0MleX43jx8/Xh9++KEqVqyoW2+9VUePHvVqjsJ8z40cOVJLlizR6tWr9dBDD+m5557TmDFjvJrDpXPnznrzzTeVnJysqVOnau3atbrjjjvc0+wMw9CQIUM0fPjwIk/XLIpDhw7pxIkTev7559W5c2d98sknuuuuu3T33Xdr7dq1HvuW1HfzhRYuXKhy5crp7rvvznOfH3/8Ua+88ooeeuihYs3iM8weOkPBZKFpgV999ZUhydi/f7/Hfr169TJ69+5d4nkMwzA2btxotGjRwpBkOBwOIyYmxrjjjjuMzp07F2uW4cOHG7Vr1zZ+++03j+0jRoww2rZta3z66afG1q1bjUmTJhkVKlQwtm3b5vUMGRkZxg8//GBs3LjReOqpp4zQ0FDj22+/9djn1KlTRoUKFYx//etfXn9+l7179xpVq1Y1vvnmG/e2C6feuT43Y8eO9fi95s2bG0899VSx5XL5888/jfLly+c59SU5ObnYpr4FBAQYkZGRHtseffRR44YbbjAM49znNywszGPqVklNC4yOjjbuvPPOHNuPHTtmfP/998batWuNbt26Ga1atTJOnz7t9ed/5513jJo1axrvvPOOsW3bNuPNN980KlWqZLzxxhs59s3MzDS6detmXHfddUZaWprXsxiGYfz444/GLbfc4v4uuf76640BAwYYERERHvuV9LTAwnzvPvvss8Y111yT47GqVKlizJo1q8TzGIZhvP3220ZYWJjhcDiMwMBA44knnjDCwsKM559/3mtZLpTf+1Jc38v55SnMZ/a3334z7Ha78d57711WjvyynDhxwvjhhx+MlJQU4/777zfq1KljHDx40DCMc++RJOO1115z73/mzBkjNDTUmDNnjteyXOr33Lx584xSpUoZZ86cueQsueXJzU8//eQxZfKll14ybrzxRuPs2bOGYXjvz/3FWfbt22dIMvr16+exX7du3Yy+fft6bCuO7+aCXptGjRoZI0aMyPP+33//3ahfv77xwAMPXFYOf0K58gFWKleuL5+Lv1xuueUWY+TIkSWe50LHjh0zDh06ZBiGYbRt29b429/+Vmw5HnnkEaNmzZrGzz//7LH9xx9/zHEOmGEYRseOHY2HHnqo2PJc+DwPPvigx7Y333zTCAgIcL82xWHFihXuf5C6fiQZNpvNcDgc7tflrbfe8vi93r17G/379y+2XBdq06ZNnkXuxIkThiQjMTHR68979dVX5/hLZ9asWUb16tUNwzCMF1980f06Xfja2e12o3bt2l7P4/LLL78YdrvdSEhIyHe/jIwMIyQkxFi8eLHXM9SsWdOYOXOmx7bJkycbjRo18tiWmZlp9OzZ07j22muNI0eOeD3HxU6cOOEuDr179za6dOnicX9Jl6vCfO/OmzfPuOqqqzzuz8rKMhwOh7F8+fISz3Oh1NRU4/jx48aJEycMu91uvPvuu17LcqG83pfi/F7OK09hP7NPP/20UaVKFSMzM/OycuSX5WINGjQwnnvuOcMwDOOzzz4zJBlffPGFxz5t27Y1xo0b57Usl/o9t2PHDkOSsWvXrkvOkluevFxYKnv06GHY7fYcmR0OhzFo0CCvZcnIyDBKlSplTJ482WO/MWPGGO3bt8/zcbz13Zzfa/P5558bkoytW7fmev++ffuMhg0bGgMHDvQ4v/JKx7RAFEndunUVHh6u5ORk97b09HStX78+13N9SlKFChVUpUoV/fDDD9q4caN69Ojh9ecwDEMjRozQihUr9Nlnn6lu3boe97vO27l4BTyHwyGn0+n1PBdzOp3u89Bc5s2bp+7du6tKlSrF9rwdO3bU9u3btXXrVvdPmzZtNGDAAG3dulX16tVT9erVcyxb//3336t27drFlsvlxIkT+umnn1StWrVc73ctrZvX/ZfjxhtvzPe4Bw4cqG3btnm8dtWrV9fo0aO1atUqr+dxWbBggapWraquXbvmu59x7v+Ey/G58oZTp04V+GclKytLvXv31g8//KBPP/1UlStX9nqOi5UpU0bVqlXTn3/+qVWrVhXLd0lRFOZ7NzIyUseOHdOmTZvc+3z22WdyOp1q165diee5UFhYmMqWLaulS5cqODhYnTp18mqegpT093JhP7OGYWjBggUaNGiQAgICvJ4jLxf+PdG6dWsFBQV5fEdlZWXpl19+8ep386V+z23dulV2uz3HlNfi8Pvvv+uPP/5w/z3w8ssv65tvvnHnXblypSRp6dKlevbZZ732vIGBgbr++uuL/PdjcX43u8ybN0+tW7d2n6N3oX379unWW291r2JYXCv/+qJSZgdA7k6cOKEff/zRfXvPnj3aunWrKlWqpKuvvtrU537sscf0zDPPqGHDhqpbt64mTJig6tWrX9J1MbyRZ9myZapSpYquvvpqbd++XaNGjVLPnj0VHR3t9SyPPPKIFi9erP/+978qV66c+/yCChUqqHTp0oqIiFCDBg300EMP6V//+pcqV66shIQE99LI3jR27Fjdcccduvrqq3X8+HEtXrxYa9as8fiL6scff9Tnn3/u/kuhuJQrV87jvDPp3D9QK1eu7N4+evRoxcXFqUWLFmrZsqUWLlyoXbt26b333vN6nieeeELdunVT7dq1tX//fsXFxcnhcKhfv3766aeftHjxYnXp0kWVK1fWtm3b9Pjjj+uWW27JsXy8Nzz++ONq3769nnvuOfXu3VsbNmzQ3LlzNXfuXElS5cqVc/zjKyAgQOHh4WrUqJHX80jn/nG1YMECDR48WKVK/fXXwM8//6ylS5cqOjpaVapU0e+//67nn39epUuXVpcuXbyeo1u3bnr22Wd19dVXq2nTptqyZYumT5+u+++/X9K5f+Tde++92rx5sz788ENlZ2e7/8xVqlTJ6+cauJY3btSokX788UeNHj1aERER7oUGjh49qr1797qvWeT6x1B4eLjCw8Mv67kv93u3cePG6ty5s4YNG6Y5c+YoKytLI0aMUN++fS9pCWlv/D0wc+ZMtW/fXmXLllVSUpJGjx6t559/vsiLkRSUpaD3xdvfy/nlqVatWqE/s5999pn27Nmj//u//ytyhsJkqVy5sp599ll1795d1apV05EjR/Tqq69q37597ksvlC9fXsOHD1dcXJxq1aql2rVru69l5NrHG1muvvrqAr/nUlJStH79et12220qV66cUlJS9Pjjj+u+++7L9dIVl5OnUqVKio+P1z333KPw8HD99NNPGjNmjBo0aKCYmBhJyvFvrbJly0qS6tevX+Tz4wp6bUaPHq0+ffrolltu0W233abExER98MEHWrNmjSTvfzcX5t+Y6enpWrZsmaZNm5bj913Fqnbt2vrXv/7lsbT/5X4X+gUTR82Qj9WrVxuScvwMHjzY9Od2Op3GhAkTjLCwMCMoKMjo2LGjsXv3btPyvPTSS0bNmjWNgIAA4+qrrzbGjx9vZGRkFEuW3HJIMhYsWODe5/vvvzfuvvtuo2rVqkZISIhx7bXX5lgC2Bvuv/9+o3bt2kZgYKBRpUoVo2PHjsYnn3zisc/YsWONWrVqmTJcn9ty51OmTDFq1qxphISEGJGRkTmmonhLnz59jGrVqhmBgYFGjRo1jD59+rjPp9q7d69xyy23GJUqVTKCgoKMBg0aGKNHjy6283gMwzA++OADo1mzZkZQUJARERFhzJ07N9/9i/ucq1WrVhmScvy53bdvn3HHHXcYVatWNQICAoyaNWsa/fv3v+wpOXlJT083Ro0aZVx99dVGcHCwUa9ePeMf//iH+8+va5pXbj+rV6/2ep6lS5ca9erVMwIDA43w8HDjkUceMY4dO+a+f8GCBblmiYuLu+zn9sb37h9//GH069fPKFu2rFG+fHlj6NChxvHjx03LM3DgQKNSpUpGYGDgZX0PFpSlMO+LN7+X88tTlM9sv3798p32dblZTp8+bdx1111G9erVjcDAQKNatWpG9+7djQ0bNng8RmZmpvH3v//dqFq1qlGuXDkjKioqxxTKy82Sm4u/5zZt2mS0a9fOqFChghEcHGw0btzYeO655y75fKv88pw6dcqIjo42qlSpYgQEBBi1a9c2hg0bZqSmpub5eJczHbgwr828efOMBg0aGMHBwUaLFi08pmx7+7u5MHlee+01o3Tp0h7fgS55/ZmjVpxjMwzDKLiCAQAAAADywwRJAAAAAPACyhUAAAAAeAHlCgAAAAC8gHIFAAAAAF5AuQIAAAAAL6BcAQAAAIAXUK4AAAAAwAsoVwAAAADgBZQrAAC8zGazKSEhwewYAIASRrkCAPiVIUOGyGaz5fjp3Lmz2dEAAH6ulNkBAADwts6dO2vBggUe24KCgkxKAwC4UjByBQDwO0FBQQoPD/f4qVixoqRzU/Zmz56tO+64Q6VLl1a9evX03nvvefz+9u3bdfvtt6t06dKqXLmyHnzwQZ04ccJjn/nz56tp06YKCgpStWrVNGLECI/7jxw5orvuukshISFq2LCh3n///eI9aACA6ShXAIArzoQJE3TPPffom2++0YABA9S3b1/t3LlTknTy5EnFxMSoYsWK+t///qdly5bp008/9ShPs2fP1iOPPKIHH3xQ27dv1/vvv68GDRp4PEd8fLx69+6tbdu2qUuXLhowYICOHj1aoscJAChZNsMwDLNDAADgLUOGDNGiRYsUHBzssX3cuHEaN26cbDabhg8frtmzZ7vvu+GGG9SqVSvNmjVLr7/+up588kn99ttvKlOmjCRp5cqV6tatm/bv36+wsDDVqFFDQ4cO1TPPPJNrBpvNpvHjx2vy5MmSzhW2smXL6uOPP+bcLwDwY5xzBQDwO7fddptHeZKkSpUquf87MjLS477IyEht3bpVkrRz5061aNHCXawk6cYbb5TT6dTu3btls9m0f/9+dezYMd8M1157rfu/y5Qpo/Lly+vQoUOXekgAAB9AuQIA+J0yZcrkmKbnLaVLly7UfgEBAR63bTabnE5ncUQCAFgE51wBAK44X3/9dY7bjRs3liQ1btxY33zzjU6ePOm+/6uvvpLdblejRo1Urlw51alTR8nJySWaGQBgfYxcAQD8TkZGhlJTUz22lSpVSqGhoZKkZcuWqU2bNrrpppv09ttva8OGDZo3b54kacCAAYqLi9PgwYM1adIkHT58WI8++qgGDhyosLAwSdKkSZM0fPhwVa1aVXfccYeOHz+ur776So8++mjJHigAwFIoVwAAv5OYmKhq1ap5bGvUqJF27dol6dxKfkuWLNHf/vY3VatWTe+8846aNGkiSQoJCdGqVas0atQoXX/99QoJCdE999yj6dOnux9r8ODBOnPmjF588UU98cQTCg0N1b333ltyBwgAsCRWCwQAXFFsNptWrFihnj17mh0FAOBnOOcKAAAAALyAcgUAAAAAXsA5VwCAKwqz4QEAxYWRKwAAAADwAsoVAAAAAHgB5QoAAAAAvIByBQAAAABeQLkCAAAAAC+gXAEAAACAF1CuAAAAAMALKFcAAAAA4AX/D0Fg17dhZeDMAAAAAElFTkSuQmCC",
270
+ "text/plain": [
271
+ "<Figure size 1000x600 with 1 Axes>"
272
+ ]
273
+ },
274
+ "metadata": {},
275
+ "output_type": "display_data"
276
+ }
277
+ ],
278
+ "source": [
279
+ "import matplotlib.pyplot as plt\n",
280
+ "\n",
281
+ "# Merge all the evaluation loss lists\n",
282
+ "eval_loss_lists = [\n",
283
+ " [\n",
284
+ " 0.07517127692699432,\n",
285
+ " 0.07137121260166168,\n",
286
+ " 0.06598775833845139,\n",
287
+ " 0.0005441228277049959,\n",
288
+ " 0.0002996980620082468,\n",
289
+ " 0.00021371280308812857,\n",
290
+ " 0.00028233605553396046,\n",
291
+ " 9.069988300325349e-05,\n",
292
+ " 7.004399230936542e-05,\n",
293
+ " 9.137028973782435e-05,\n",
294
+ " 5.340397547115572e-05,\n",
295
+ " 5.0301870942348614e-05\n",
296
+ " ],\n",
297
+ " [\n",
298
+ " 1.597152731847018e-05,\n",
299
+ " 1.162805529020261e-05,\n",
300
+ " 9.043936188390944e-06,\n",
301
+ " 1.379685454594437e-05,\n",
302
+ " 5.367660833144328e-06,\n",
303
+ " 4.6886875679774676e-06,\n",
304
+ " 4.490133960644016e-06,\n",
305
+ " 6.136821866675746e-06,\n",
306
+ " 3.3243470625166083e-06,\n",
307
+ " 2.348009729757905e-06,\n",
308
+ " 2.1804094103572425e-06,\n",
309
+ " 1.958705070137512e-06\n",
310
+ " ],\n",
311
+ " [\n",
312
+ " 3.93469099435606e-06,\n",
313
+ " 1.65619246672577e-06,\n",
314
+ " 1.1269650030953926e-06,\n",
315
+ " 8.881219173417776e-07,\n",
316
+ " 1.3077693665763945e-06,\n",
317
+ " 7.212336186057655e-07,\n",
318
+ " 6.988730092416517e-07,\n",
319
+ " 5.00343162457284e-07,\n",
320
+ " 4.1343139400851214e-07,\n",
321
+ " 5.06081335061026e-07,\n",
322
+ " 7.039822662591178e-07,\n",
323
+ " 5.087575800644117e-07\n",
324
+ " ],\n",
325
+ " [\n",
326
+ " 5.1233128033345565e-06,\n",
327
+ " 1.3323343637239304e-06,\n",
328
+ " 1.1789074960688595e-06,\n",
329
+ " 1.0221098136753426e-06,\n",
330
+ " 1.4271246300268103e-06,\n",
331
+ " 1.0917949566646712e-06,\n",
332
+ " 1.8720394336924073e-06,\n",
333
+ " 0.00015229727432597429,\n",
334
+ " 0.00016713247168809175,\n",
335
+ " 7.280236604856327e-05,\n",
336
+ " 5.6143608162528835e-06,\n",
337
+ " 1.2813707144232467e-06\n",
338
+ " ],\n",
339
+ " [\n",
340
+ " 1.7742066802384215e-06,\n",
341
+ " 3.1642618978366954e-06,\n",
342
+ " 2.774180939013604e-05,\n",
343
+ " 7.504659606638597e-06,\n",
344
+ " 1.0794157105920021e-06,\n",
345
+ " 8.346623303623346e-07,\n",
346
+ " 1.572396286064759e-06,\n",
347
+ " 4.874376031693828e-07,\n",
348
+ " 6.269995651564386e-07,\n",
349
+ " 5.949763703938515e-07,\n",
350
+ " 5.836409968651424e-07,\n",
351
+ " 5.382337917581026e-07\n",
352
+ " ],\n",
353
+ " [\n",
354
+ " 1.3506955838238355e-05,\n",
355
+ " 2.3305697141040582e-06,\n",
356
+ " 2.193627324231784e-06,\n",
357
+ " 3.027681714229402e-07,\n",
358
+ " 4.6904440864636854e-07,\n",
359
+ " 4.6231170358623785e-07,\n",
360
+ " 2.520739883493661e-07,\n",
361
+ " 2.040175957063184e-07,\n",
362
+ " 1.8624521658239246e-07,\n",
363
+ " 4.635896289073571e-07,\n",
364
+ " 2.6239982275910734e-07,\n",
365
+ " 2.4372931761718064e-07\n",
366
+ " ],\n",
367
+ " [\n",
368
+ " 5.271021564112743e-06,\n",
369
+ " 3.550181190803414e-06,\n",
370
+ " 2.5201459266099846e-06,\n",
371
+ " 2.8312820177234244e-06,\n",
372
+ " 1.4717104477313114e-06,\n",
373
+ " 2.2729768716089893e-06,\n",
374
+ " 1.030095177156909e-06,\n",
375
+ " 1.0983015954479924e-06,\n",
376
+ " 8.350090752173855e-07,\n",
377
+ " 4.235817687003873e-05,\n",
378
+ " 0.00017692078836262226,\n",
379
+ " 5.840817902935669e-05\n",
380
+ " ],\n",
381
+ " [\n",
382
+ " 1.2606010386662092e-06,\n",
383
+ " 7.131714937713696e-06,\n",
384
+ " 8.305702976940665e-06,\n",
385
+ " 6.520267561427318e-07,\n",
386
+ " 1.0400606953453462e-07,\n",
387
+ " 1.2373440938517888e-07,\n",
388
+ " 1.2282114880690642e-07,\n",
389
+ " 1.4778217405364558e-07,\n",
390
+ " 1.125305075788674e-07,\n",
391
+ " 4.522570762333089e-08,\n",
392
+ " 2.48692485911306e-05,\n",
393
+ " 5.199101238417825e-08\n",
394
+ " ],\n",
395
+ " [\n",
396
+ " 1.329818132944638e-06,\n",
397
+ " 9.433363743482914e-07,\n",
398
+ " 8.183121735783061e-07,\n",
399
+ " 1.0200094493484357e-06,\n",
400
+ " 7.936826023069443e-07,\n",
401
+ " 7.760887115182413e-07,\n",
402
+ " 2.45380675778506e-07,\n",
403
+ " 0.0001625938602956012,\n",
404
+ " 1.0732967581361663e-07,\n",
405
+ " 1.0528655138841714e-06,\n",
406
+ " 9.632424280425766e-07,\n",
407
+ " 7.961476740092621e-07\n",
408
+ " ],\n",
409
+ " [\n",
410
+ " 4.5500939904741244e-07,\n",
411
+ " 7.533798793701862e-07,\n",
412
+ " 4.7130234293035755e-07,\n",
413
+ " 7.465733347089554e-07,\n",
414
+ " 9.549980859446805e-07,\n",
415
+ " 6.432795771615929e-07,\n",
416
+ " 6.765155831089942e-07,\n",
417
+ " 6.765155831089942e-07,\n",
418
+ " 5.451398692457587e-07,\n",
419
+ " 4.994994355911331e-07,\n",
420
+ " 5.466189918479358e-07,\n",
421
+ " 4.268927682460344e-07\n",
422
+ " ],\n",
423
+ " [\n",
424
+ " 2.63293713942403e-07,\n",
425
+ " 3.551216138930613e-07,\n",
426
+ " 2.3628319922863739e-07,\n",
427
+ " 9.180489541904535e-07,\n",
428
+ " 1.1080908279836876e-06,\n",
429
+ " 6.248191084523569e-07,\n",
430
+ " 8.346111712853599e-07,\n",
431
+ " 5.276984325064404e-07,\n",
432
+ " 3.681239491015731e-07,\n",
433
+ " 1.8970614235058747e-07,\n",
434
+ " 3.114948299298703e-07,\n",
435
+ " 2.9696289516323304e-07\n",
436
+ " ],\n",
437
+ " # [\n",
438
+ " # 2.38517332036281e-05,\n",
439
+ " # 3.9089650272217114e-07,\n",
440
+ " # 6.718229883517779e-08,\n",
441
+ " # 1.4773820566915674e-07,\n",
442
+ " # 5.8338137876035034e-08,\n",
443
+ " # 3.57102081238736e-08,\n",
444
+ " # 2.2298079329630127e-06,\n",
445
+ " # 3.583775196602801e-07,\n",
446
+ " # 9.418199908850511e-08,\n",
447
+ " # 1.338206288892252e-06,\n",
448
+ " # 3.194011810592201e-07,\n",
449
+ " # 2.245769792352803e-07\n",
450
+ " # ],\n",
451
+ " [\n",
452
+ " 2.3522443370893598e-06,\n",
453
+ " 1.1711344996001571e-06,\n",
454
+ " 1.1321773172312533e-06,\n",
455
+ " 5.756968448622501e-07,\n",
456
+ " 4.4675923049908306e-07,\n",
457
+ " 4.365276993212319e-07,\n",
458
+ " 5.525398591998965e-07,\n",
459
+ " 4.404951710057503e-07,\n",
460
+ " 4.4630780848819995e-07,\n",
461
+ " 4.764913796861947e-07,\n",
462
+ " 4.10373701242861e-07,\n",
463
+ " 3.762708331578324e-07\n",
464
+ " ],\n",
465
+ " [\n",
466
+ " 2.1882451051169483e-07,\n",
467
+ " 5.146034354197582e-08,\n",
468
+ " 3.1587944704369875e-08,\n",
469
+ " 1.122993165125763e-08,\n",
470
+ " 8.033423704034703e-09,\n",
471
+ " 7.330823059703562e-09,\n",
472
+ " 2.0332389993882316e-08,\n",
473
+ " 1.718821529550496e-08,\n",
474
+ " 1.5028433608677005e-08,\n",
475
+ " 3.9828059072988253e-08,\n",
476
+ " 2.8266715190738978e-08,\n",
477
+ " 2.1497044144780375e-08\n",
478
+ " ],\n",
479
+ " [\n",
480
+ " 1.4871952558337398e-08,\n",
481
+ " 1.2490186662716951e-08,\n",
482
+ " 1.213749456496771e-08,\n",
483
+ " 1.159435214503901e-08,\n",
484
+ " 1.1296255486570317e-08,\n",
485
+ " 1.1153668211250078e-08,\n",
486
+ " 1.3103758966792611e-08,\n",
487
+ " 1.2461796927709656e-08,\n",
488
+ " 1.2030940688134706e-08,\n",
489
+ " 1.306745200935211e-08,\n",
490
+ " 1.029541429886649e-08,\n",
491
+ " 9.854548288501519e-09\n",
492
+ " ]\n",
493
+ "]\n",
494
+ "\n",
495
+ "# Flatten the nested list\n",
496
+ "merged_list = [item for sublist in eval_loss_lists for item in sublist]\n",
497
+ "\n",
498
+ "# Number of epochs\n",
499
+ "epochs = 20\n",
500
+ "\n",
501
+ "# Plotting the evaluation loss curve\n",
502
+ "plt.figure(figsize=(10, 6))\n",
503
+ "plt.plot(range(1, len(merged_list) + 1), merged_list, marker='o')\n",
504
+ "plt.title('Evaluation Loss Curve')\n",
505
+ "plt.xlabel('Epoch')\n",
506
+ "plt.ylabel('Evaluation Loss')\n",
507
+ "plt.xticks(range(1, len(merged_list) + 1, len(merged_list) // epochs))\n",
508
+ "plt.grid(True)\n",
509
+ "plt.show()\n"
510
+ ]
511
+ },
512
+ {
513
+ "cell_type": "code",
514
+ "execution_count": 2,
515
+ "metadata": {},
516
+ "outputs": [
517
+ {
518
+ "data": {
519
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHHCAYAAACRAnNyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABVVklEQVR4nO3deVxU5f4H8M+ZYWbYBmRfBBHcUHDfUjMzcUHTNE1TKpdbt1zS8npft7q31NK63X6Zt7pp3UrrkmmLWlYuuKRp7jsuuOEOIrJvA8w8vz9gJkdQGJjhzAyf9+s1L5kzhzPfL6B+OM9zniMJIQSIiIiI7JBC7gKIiIiI7oZBhYiIiOwWgwoRERHZLQYVIiIislsMKkRERGS3GFSIiIjIbjGoEBERkd1iUCEiIiK7xaBCREREdotBhaiBSJKEefPmyfLev/76KyRJwq+//irL+5P9kyQJM2bMkLsMoioYVKhRWb58OSRJuutjz549cpdYLx999BGWL18udxlmHnzwQcTGxspdhuzu9XP33HPPyV0ekd1ykbsAIjm8/vrriIyMrLK9ZcuWMlRjPR999BH8/f0xadIks+0PPPAAiouLoVar5SmMAAADBw7EU089VWV769atZaiGyDEwqFCjFB8fj27dusldRoNRKBRwdXWVu4xGr3Xr1njiiSfkLoPIoXDoh+gOZWVl8PX1xeTJk6u8lpeXB1dXV8yZMwcAUFpaitdeew1du3aFt7c3PDw80LdvX2zbtq3G95k0aRKaN29eZfu8efMgSZLZtmXLluGhhx5CYGAgNBoN2rVrhyVLlpjt07x5c5w4cQLbt283DSk8+OCDAO4+R+Xbb79F165d4ebmBn9/fzzxxBO4du1alTo9PT1x7do1jBw5Ep6enggICMCcOXOg1+tr7LO2PvroI8TExECj0SA0NBTTp09HTk6O2T5nz57F6NGjERwcDFdXV4SFheHxxx9Hbm6uaZ+kpCTcf//9aNKkCTw9PdGmTRu88sor93zv2NhY9O/fv8p2g8GApk2bYsyYMaZtK1euRNeuXaHVauHl5YX27dvj3//+d/2av41xqOzgwYPo3bs33NzcEBkZiaVLl1bZNyMjA3/6058QFBQEV1dXdOzYEV988UW1ffz73/9G+/bt4erqioCAAAwZMgQHDhyosu/atWsRGxsLjUaDmJgYbNiwwez1/Px8vPDCC2jevDk0Gg0CAwMxcOBAHDp0yGpfA6Lb8YwKNUq5ubnIzMw02yZJEvz8/KBSqTBq1CisXr0aH3/8sdlwydq1a6HT6fD4448DqAgun376KcaPH49nnnkG+fn5+OyzzzB48GDs27cPnTp1skq9S5YsQUxMDEaMGAEXFxesW7cO06ZNg8FgwPTp0wEAixcvxvPPPw9PT0/8/e9/BwAEBQXd9ZjLly/H5MmT0b17d7z11lu4ceMG/v3vf2PXrl04fPgwmjRpYtpXr9dj8ODB6NmzJ/7v//4PmzdvxrvvvosWLVpg6tSp9e5v3rx5mD9/PuLi4jB16lSkpKRgyZIl2L9/P3bt2gWVSoXS0lIMHjwYOp0Ozz//PIKDg3Ht2jX89NNPyMnJgbe3N06cOIGHH34YHTp0wOuvvw6NRoNz585h165d93z/cePGYd68eUhPT0dwcLBp+86dO3H9+nXT9zspKQnjx4/HgAED8PbbbwMATp06hV27dmHWrFk19llSUlLl5w4AvLy8zH7OsrOzMXToUIwdOxbjx4/HN998g6lTp0KtVmPKlCkAgOLiYjz44IM4d+4cZsyYgcjISHz77beYNGkScnJyzOr505/+hOXLlyM+Ph5PP/00ysvL8dtvv2HPnj1mZxZ37tyJ1atXY9q0adBqtXj//fcxevRoXL58GX5+fgCA5557Dt999x1mzJiBdu3a4datW9i5cydOnTqFLl261Pg1ILKYIGpEli1bJgBU+9BoNKb9Nm7cKACIdevWmX3+0KFDRVRUlOl5eXm50Ol0ZvtkZ2eLoKAgMWXKFLPtAMTcuXNNzydOnCgiIiKq1Dh37lxx51/NoqKiKvsNHjzYrBYhhIiJiRH9+vWrsu+2bdsEALFt2zYhhBClpaUiMDBQxMbGiuLiYtN+P/30kwAgXnvtNbM6AYjXX3/d7JidO3cWXbt2rfJed+rXr5+IiYm56+sZGRlCrVaLQYMGCb1eb9r+4YcfCgDi888/F0IIcfjwYQFAfPvtt3c91nvvvScAiJs3b9ZY1+1SUlIEAPHBBx+YbZ82bZrw9PQ0ff1nzZolvLy8RHl5uUXHF0Lc9ecOgPj6669N+/Xr108AEO+++65pm06nE506dRKBgYGitLRUCCHE4sWLBQCRmJho2q+0tFT06tVLeHp6iry8PCGEEFu3bhUAxMyZM6vUZDAYzOpTq9Xi3Llzpm1Hjx6t8nXx9vYW06dPt7h/orri0A81Sv/5z3+QlJRk9li/fr3p9Yceegj+/v5YtWqVaVt2djaSkpIwbtw40zalUmn6TdhgMCArKwvl5eXo1q2bVU+Fu7m5mT42ng3q168fLly4YDbsUVsHDhxARkYGpk2bZjZ3ZdiwYYiOjsbPP/9c5XPuvDKlb9++uHDhgsXvfafNmzejtLQUL7zwAhSKP/5JeuaZZ+Dl5WWqxdvbGwCwceNGFBUVVXss41mgH374AQaDodY1tG7dGp06dTL7fuv1enz33XcYPny46evfpEkTFBYWIikpyaIejR555JEqP3dJSUlVhp1cXFzw7LPPmp6r1Wo8++yzyMjIwMGDBwEAv/zyC4KDgzF+/HjTfiqVCjNnzkRBQQG2b98OAPj+++8hSRLmzp1bpZ47hxjj4uLQokUL0/MOHTrAy8vL7PvcpEkT7N27F9evX6/T14DIUk4TVHbs2IHhw4cjNDQUkiRh7dq1Nn0/4zyC2x/R0dE2fU+ynh49eiAuLs7scft/Fi4uLhg9ejR++OEH6HQ6AMDq1atRVlZmFlQA4IsvvkCHDh3g6uoKPz8/BAQE4Oeff65TgLibXbt2IS4uDh4eHmjSpAkCAgJM8y7q8j6XLl0CALRp06bKa9HR0abXjYzzGm7n4+OD7Oxsi9+7trWo1WpERUWZXo+MjMTs2bPx6aefwt/fH4MHD8Z//vMfs/7HjRuHPn364Omnn0ZQUBAef/xxfPPNN7UKLePGjcOuXbtMc3R+/fVXZGRkmH2/p02bhtatWyM+Ph5hYWGYMmVKlTkc9xIWFlbl5y4uLq7KEF1oaCg8PDzMthmvDLp48SKAiq9bq1atzMIdALRt29b0OgCcP38eoaGh8PX1rbG+Zs2aVdl25/f5X//6F5KTkxEeHo4ePXpg3rx5VgmsRHfjNEGlsLAQHTt2xH/+858Ge8+YmBikpaWZHjt37myw9ybbe/zxx5Gfn2860/LNN98gOjoaHTt2NO2TmJiISZMmoUWLFvjss8+wYcMGJCUl4aGHHqrxP8c7f5s1unOC6vnz5zFgwABkZmZi0aJF+Pnnn5GUlIQXX3wRACw6c1BXSqXS5u9RG++++y6OHTuGV155BcXFxZg5cyZiYmJw9epVABVnnnbs2IHNmzfjySefxLFjxzBu3DgMHDiwxom/48aNgxAC3377LYCK77e3tzeGDBli2icwMBBHjhzBjz/+iBEjRmDbtm2Ij4/HxIkTbdd0A7rb91kIYfp47NixuHDhAj744AOEhobinXfeQUxMjNkZSSJrcpqgEh8fjwULFmDUqFHVvq7T6TBnzhw0bdoUHh4e6NmzZ71X6XRxcUFwcLDp4e/vX6/jkX154IEHEBISglWrViEzMxNbt26tcjblu+++Q1RUFFavXo0nn3wSgwcPRlxcHEpKSmo8vo+PT5WrWgBUOZuxbt066HQ6/Pjjj3j22WcxdOhQxMXFmQ0HGd0t/NwpIiICAJCSklLltZSUFNPrDeFutZSWliI1NbVKLe3bt8c//vEP7NixA7/99huuXbtmdkWMQqHAgAEDsGjRIpw8eRILFy7E1q1ba7wSKzIyEj169MCqVatQXl6O1atXY+TIkdBoNGb7qdVqDB8+HB999BHOnz+PZ599Fl9++SXOnTtXny+DmevXr6OwsNBs25kzZwDAdKVYREQEzp49WyWonj592vQ6ALRo0QLXr19HVlaW1eoLCQnBtGnTsHbtWqSmpsLPzw8LFy602vGJbuc0QaUmM2bMwO7du7Fy5UocO3YMjz32GIYMGYKzZ8/W+Zhnz55FaGgooqKikJCQgMuXL1uxYpKbQqHAmDFjsG7dOvzvf/9DeXl5laBi/A309t849+7di927d9d4/BYtWiA3NxfHjh0zbUtLS8OaNWtqfI/c3FwsW7asyjE9PDyqDT936tatGwIDA7F06VLT0BYArF+/HqdOncKwYcNqPIa1xMXFQa1W4/333zfr8bPPPkNubq6plry8PJSXl5t9bvv27aFQKEw9VPefsfHKq9v7vJtx48Zhz549+Pzzz5GZmVnl+33r1i2z5wqFAh06dKj18WurvLwcH3/8sel5aWkpPv74YwQEBKBr164AgKFDhyI9Pd1sXk15eTk++OADeHp6ol+/fgCA0aNHQwiB+fPnV3mf27/etaHX66sMNQYGBiI0NNSq/RPdrlFcnnz58mUsW7YMly9fRmhoKABgzpw52LBhA5YtW4Y333zT4mP27NkTy5cvR5s2bZCWlob58+ejb9++SE5OhlartXYLZGXr1683/eZ5u969eyMqKsr0fNy4cfjggw8wd+5ctG/f3jT+b/Twww9j9erVGDVqFIYNG4bU1FQsXboU7dq1Q0FBwT1rePzxx/G3v/0No0aNwsyZM1FUVIQlS5agdevWZhNxBw0aZPot/tlnn0VBQQH++9//IjAwEGlpaWbH7Nq1K5YsWYIFCxagZcuWCAwMxEMPPVTlvVUqFd5++21MnjwZ/fr1w/jx402XJzdv3tw0rGQtN2/exIIFC6psj4yMREJCAl5++WXMnz8fQ4YMwYgRI5CSkoKPPvoI3bt3Ny2QtnXrVsyYMQOPPfYYWrdujfLycvzvf/+DUqnE6NGjAVSsOLxjxw4MGzYMERERyMjIwEcffYSwsDDcf//9NdY5duxYzJkzB3PmzIGvry/i4uLMXn/66aeRlZWFhx56CGFhYbh06RI++OADdOrUqcrPRnXOnDmDxMTEKtuDgoIwcOBA0/PQ0FC8/fbbuHjxIlq3bo1Vq1bhyJEj+OSTT6BSqQAAf/7zn/Hxxx9j0qRJOHjwIJo3b47vvvsOu3btwuLFi03/DvXv3x9PPvkk3n//fZw9exZDhgyBwWDAb7/9hv79+1t0f5/8/HyEhYVhzJgx6NixIzw9PbF582bs378f7777bq2PQ2QRGa84shkAYs2aNabnxksuPTw8zB4uLi5i7NixQgghTp06dc/LBwGIv/3tb3d9z+zsbOHl5SU+/fRTW7dH9XCvy5MBiGXLlpntbzAYRHh4uAAgFixYUOV4BoNBvPnmmyIiIkJoNBrRuXNn8dNPP1V76THuuDxZCCE2bdokYmNjhVqtFm3atBGJiYnVXp78448/ig4dOghXV1fRvHlz8fbbb4vPP/9cABCpqamm/dLT08WwYcOEVqsVAEyXKt95ebLRqlWrROfOnYVGoxG+vr4iISFBXL161WyfiRMnCg8Pjyq9V1dndYyX21b3GDBggGm/Dz/8UERHRwuVSiWCgoLE1KlTRXZ2tun1CxcuiClTpogWLVoIV1dX4evrK/r37y82b95s2mfLli3ikUceEaGhoUKtVovQ0FAxfvx4cebMmRrrNOrTp48AIJ5++ukqr3333Xdi0KBBIjAwUKjVatGsWTPx7LPPirS0tBqPe6+fu9svKTdezn3gwAHRq1cv4erqKiIiIsSHH35Y5Zg3btwQkydPFv7+/kKtVov27dtX+RkWouIy+nfeeUdER0cLtVotAgICRHx8vDh48KBZfdVddhwRESEmTpwohKi4TPqvf/2r6Nixo9BqtcLDw0N07NhRfPTRRzX2T1RXkhAWnvtzAJIkYc2aNRg5ciQAYNWqVUhISMCJEyeqTBbz9PREcHAwSktLa5y5bryi4266d++OuLg4vPXWW/XugYgapwcffBCZmZlITk6WuxQiu9Aohn46d+4MvV6PjIwM9O3bt9p91Gp1vS4vLigowPnz5/Hkk0/W+RhERERkzmmCSkFBgdms+9TUVBw5cgS+vr5o3bo1EhIS8NRTT+Hdd99F586dcfPmTWzZsgUdOnSo08TBOXPmYPjw4YiIiMD169cxd+5cKJVKs8WXiIiIqH6cJqgcOHDAbMGu2bNnAwAmTpyI5cuXY9myZViwYAH+8pe/4Nq1a/D398d9992Hhx9+uE7vd/XqVYwfPx63bt1CQEAA7r//fuzZs+eeQ0NERERkGaeco0JERETOodGso0JERESOh0GFiIiI7JZDz1ExGAy4fv06tFptrZcOJyIiInkJIZCfn4/Q0NAqN9a8k0MHlevXryM8PFzuMoiIiKgOrly5grCwsHvu49BBxbhE9JUrV+Dl5WXVY5eVlWHTpk0YNGiQaclqZ8L+HJ+z9+js/QHO3yP7c3y26jEvLw/h4eG1uuWMQwcV43CPl5eXTYKKu7s7vLy8nPIHkP05Pmfv0dn7A5y/R/bn+GzdY22mbXAyLREREdktBhUiIiKyWwwqREREZLcYVIiIiMhuMagQERGR3WJQISIiIrvFoEJERER2i0GFiIiI7BaDChEREdktBpVq6A0Ce1OzcDBTwt7ULOgNQu6SiIiIGiWHXkLfFjYkp2H+upNIyy0BoMSXZw8gxNsVc4e3w5DYELnLIyIialR4RuU2G5LTMDXxUGVI+UN6bgmmJh7ChuQ0mSojIiJqnBhUKukNAvPXnUR1gzzGbfPXneQwEBERUQNiUKm0LzWrypmU2wkAabkl2Jea1XBFERERNXIMKpUy8u8eUuqyHxEREdUfg0qlQK2rVfcjIiKi+mNQqdQj0hch3q6Q7vK6BCDE2xU9In0bsiwiIqJGjUGlklIhYe7wdgBw17Ayd3g7KBV3e5WIiIisjUHlNkNiQ7DkiS4I9jYf3nFVKbDkiS5cR4WIiKiBccG3OwyJDcHAdsHYfS4Dq5L2Yt0VJcrKDegS4SN3aURERI0Oz6hUQ6mQ0DPSF3FhAp3DvaEXwLcHrspdFhERUaPDoFKD8d3DAQBf77sMAxd7IyIialAMKjWIjw2Ct5sKV7OLsePsTbnLISIialQYVGrgqlLi0S5NAQBf7b0sczVERESNC4NKLST0bAYA2Ho6A+n3WGafiIiIrItBpRZaBmrRI9IXeoPAqv1X5C6HiIio0WBQqSXjWZWV+y+jXG+QuRoiIqLGgUGllobEBsPXQ4203BL8msJJtURERA2BQaWWNC5KjOkaBgBYsY+TaomIiBoCg4oFxveoGP7ZlpKBq9lFMldDRETk/BhULBDp74E+Lf0gBDiploiIqAEwqFhoQo8IABVBpYyTaomIiGyKQcVCA9sFwd9TjYx8HbacuiF3OURERE6NQcVCahcFHutWcf8frlRLRERkWwwqdTC+ezNIEvDb2UxcvsVJtURERLbCoFIHzfzc0bdVAABeqkxERGRLDCp1NKHyUuXvDl5BaTkn1RIREdkCg0odDWgbiCAvDTILSrHpZLrc5RARETklBpU6UikVGGecVLuHwz9ERES2wKBSD+N6NINCAnZfuIULNwvkLoeIiMjpMKjUQ9MmbujfJhAA8DUn1RIREVkdg0o9TehZMan224NXUVKml7kaIiIi58KgUk8PtglEqLcrcorKsCGZk2qJiIisiUGlnpQKCY9XXqq8givVEhERWRWDihWM6x4OpULCvotZOHMjX+5yiIiInAaDihUEebliQHTFpFqeVSEiIrIeBhUrSbgvAgCw+tBVFJdyUi0REZE1MKhYSd+W/gj3dUNeSTl+OnZd7nKIiIicAoOKlSgUEh7vXjmplmuqEBERWQWDihWN7RYOF4WEw5dzcPJ6ntzlEBEROTwGFSsK0GowOCYYALBi3yWZqyEiInJ8sgYVvV6PV199FZGRkXBzc0OLFi3wxhtvQAghZ1n1Ylypdu3h6yjUlctcDRERkWNzkfPN3377bSxZsgRffPEFYmJicODAAUyePBne3t6YOXOmnKXVWa8oP0T6eyA1sxDrjl43LQZHRERElpP1jMrvv/+ORx55BMOGDUPz5s0xZswYDBo0CPv27ZOzrHpRKCSM7xEOAPiKa6oQERHVi6xBpXfv3tiyZQvOnDkDADh69Ch27tyJ+Ph4OcuqtzFdw6FWKnD8Wi6OX82VuxwiIiKHJevQz0svvYS8vDxER0dDqVRCr9dj4cKFSEhIqHZ/nU4HnU5nep6XV3FlTVlZGcrKyqxam/F4dTmuVi1hcEwg1h1LR+KeVCx4JMaqtVlDffpzBM7eH+D8PTp7f4Dz98j+HJ+terTkeJKQcebqypUr8de//hXvvPMOYmJicOTIEbzwwgtYtGgRJk6cWGX/efPmYf78+VW2r1ixAu7u7g1Rcq2dywM+OOECtULgja56uMoaCYmIiOxHUVERJkyYgNzcXHh5ed1zX1mDSnh4OF566SVMnz7dtG3BggVITEzE6dOnq+xf3RmV8PBwZGZm1tiopcrKypCUlISBAwdCpVJZ/PlCCMR/8DvO3yzEvOFtkVA5b8Ve1Lc/e+fs/QHO36Oz9wc4f4/sz/HZqse8vDz4+/vXKqjI+nt+UVERFArzaTJKpRIGg6Ha/TUaDTQaTZXtKpXKZj8k9Tl2Qs8IvP7TSaw6cA0Te0dCkiQrV1d/tvza2QNn7w9w/h6dvT/A+Xtkf47P2j1acixZJ9MOHz4cCxcuxM8//4yLFy9izZo1WLRoEUaNGiVnWVYzuksYNC4KnErLw+ErOXKXQ0RE5HBkDSoffPABxowZg2nTpqFt27aYM2cOnn32WbzxxhtylmU13u4qDOsQAgBYwUuViYiILCZrUNFqtVi8eDEuXbqE4uJinD9/HgsWLIBarZazLKtK6BkBAPjp2HXkFjnvzHAiIiJb4L1+bKxLsyaIDtaipMyA1Yevyl0OERGRQ2FQsTFJkkz3/1mx97JD38eIiIiooTGoNICRnZvCTaXE2YwC7L+YLXc5REREDoNBpQF4uaowomMoAGDF3ksyV0NEROQ4GFQaiHH455fkdGQVlspcDRERkWNgUGkgHcK8EdvUC6XlBnx/kJNqiYiIaoNBpYFIkoQJPSouVf56HyfVEhER1QaDSgMa0SkUHmolLmQWYveFW3KXQ0REZPcYVBqQp8YFIzs3BQB8xZVqiYiIasSg0sCMk2o3nUhHZoGuhr2JiIgaNwaVBhYT6o2O4U1Qphf49gAn1RIREd0Lg4oMEirPqny97zIMBk6qJSIiuhsGFRkM7xAKrasLLmcVYee5TLnLISIislsMKjJwUyvxaOWk2hWcVEtERHRXDCoymdCzYk2VpFM3cCOvROZqiIiI7BODikzaBGvRLcIHeoPAN/uvyF0OERGRXWJQkZHxUuWV+69Az0m1REREVTCoyGho+xA0cVfhWk4xtp/JkLscIiIiu8OgIiNXlRKju4QB4KRaIiKi6jCoyGx8j4rhn62nM3A9p1jmaoiIiOwLg4rMWgZ64r4oXxhExVwVIiIi+gODih0wXqq8av9llOsNMldDRERkPxhU7MDgmCD4eqhxI0+Hrac5qZaIiMiIQcUOaFyUeKxbxaTarziploiIyIRBxU6M714xqXbH2Zu4klUkczVERET2gUHFTjT398D9Lf0hBLByP8+qEBERAQwqdiWhcqXaVfuvooyTaomIiBhU7ElcuyAEaDXILNAh6eQNucshIiKSHYOKHVEpFRjbjSvVEhERGTGo2JnHuzeDJAE7z2XiYmah3OUQERHJikHFzoT7uqNf6wAAwNf7eFaFiIgaNwYVOzSh8v4/3x68Cl25XuZqiIiI5MOgYoceig5EsJcrsgpLsSE5Xe5yiIiIZMOgYodclAqM6x4OgJNqiYiocWNQsVOP9wiHQgL2pmbhXEaB3OUQERHJgkHFToV4u+Gh6CAAPKtCRESNF4OKHTOuVPv9oasoKeOkWiIianwYVOzYA60D0LSJG3KLy/DL8TS5yyEiImpwDCp2TKmQML5HxaTarzj8Q0REjRCDip0b2y0cLgoJBy9lIyU9X+5yiIiIGhSDip0L9HJFXFvjpNpLMldDRETUsBhUHEDCfRWTalcfuoai0nKZqyEiImo4DCoOoE8LfzTzdUe+rhw/HeWkWiIiajwYVByAQiFhfOX9f77ijQqJiKgRYVBxEI91C4NKKeHolRwkX8uVuxwiIqIGwaDiIPw9NRgcEwwAWMGzKkRE1EgwqDiQCZUr1f5w+BoKdJxUS0REzo9BxYH0ivJDlL8HCkv1+OHINbnLISIisjkGFQciSZLprMqKvZchhJC5IiIiIttiUHEwo7uEQe2iwInreTh2lZNqiYjIuTGoOBgfDzWGxlZMqv2KK9USEZGTY1BxQAn3RQAA1h1NQ15JmczVEBER2Q6DigPqFuGDVoGeKC7TY+1hTqolIiLnxaDigG6fVPvVHk6qJSIi58Wg4qAe7RwGV5UCKTfycehyttzlEBER2QSDioPydlfh4Q6hAICv9nKlWiIick4MKg7MOPzz07E05BSVylwNERGR9TGoOLDO4U3QNsQLpeUGfH+Ik2qJiMj5MKg4MPOVai9xUi0RETkdBhUHN7JTKNzVSpy/WYi9qVlyl0NERGRVDCoOTuuqwiOdKibVruCkWiIicjIMKk5gQo+KlWo3JKfjVoFO5mqIiIish0HFCbQP80b7pt4o1Rvw3cGrcpdDRERkNbIHlWvXruGJJ56An58f3Nzc0L59exw4cEDushxOQuWk2q/3XYbBwEm1RETkHGQNKtnZ2ejTpw9UKhXWr1+PkydP4t1334WPj4+cZTmk4R1D4alxwcVbRdh94Zbc5RAREVmFi5xv/vbbbyM8PBzLli0zbYuMjJSxIsfloXHByM6hSNxzGV/tvYQ+Lf3lLomIiKjeZA0qP/74IwYPHozHHnsM27dvR9OmTTFt2jQ888wz1e6v0+mg0/0xWTQvLw8AUFZWhrKyMqvWZjyetY9rS2O7NEXinsvYdOIGrmcVIECrueu+jtifJZy9P8D5e3T2/gDn75H9OT5b9WjJ8SRh4SphxcXFEELA3d0dAHDp0iWsWbMG7dq1w6BBgywq1NXVFQAwe/ZsPPbYY9i/fz9mzZqFpUuXYuLEiVX2nzdvHubPn19l+4oVK0z1NHbvHVfiYoGEh5vpMbAp56oQEZH9KSoqwoQJE5CbmwsvL6977mtxUBk0aBAeffRRPPfcc8jJyUF0dDRUKhUyMzOxaNEiTJ06tdbHUqvV6NatG37//XfTtpkzZ2L//v3YvXt3lf2rO6MSHh6OzMzMGhu1VFlZGZKSkjBw4ECoVCqrHtuWvj90DS+tOYGwJq7Y8mJfKBRStfs5an+15ez9Ac7fo7P3Bzh/j+zP8dmqx7y8PPj7+9cqqFg89HPo0CG89957AIDvvvsOQUFBOHz4ML7//nu89tprFgWVkJAQtGvXzmxb27Zt8f3331e7v0ajgUZTdThDpVLZ7IfElse2hUc6h+PN9Sm4mlOC3Rdz8GCbwHvu72j9WcrZ+wOcv0dn7w9w/h7Zn+Ozdo+WHMviq36Kioqg1WoBAJs2bcKjjz4KhUKB++67D5cuXbLoWH369EFKSorZtjNnziAiIsLSsqiSm1qJR7uEAeBKtURE5PgsDiotW7bE2rVrceXKFWzcuNE0LyUjI8Pi4ZcXX3wRe/bswZtvvolz585hxYoV+OSTTzB9+nRLy6LbGNdU2XI6A+m5JTJXQ0REVHcWB5XXXnsNc+bMQfPmzdGzZ0/06tULQMXZlc6dO1t0rO7du2PNmjX4+uuvERsbizfeeAOLFy9GQkKCpWXRbVoFadGjuS/0BoFV+6/IXQ4REVGdWTxHZcyYMbj//vuRlpaGjh07mrYPGDAAo0aNsriAhx9+GA8//LDFn0f3NqFnM+y7mIWV+y9jev8WcFHKvggxERGRxer0v1dwcDA6d+4MhUKBvLw8rF27FlqtFtHR0dauj+poSGwwfNxVSMstwa8pN+Uuh4iIqE4sDipjx47Fhx9+CKBiTZVu3bph7Nix6NChw12v1qGG56pSYkzXykm1+ziploiIHJPFQWXHjh3o27cvAGDNmjUQQiAnJwfvv/8+FixYYPUCqe7G96iYVLstJQNXs4tkroaIiMhyFgeV3Nxc+Pr6AgA2bNiA0aNHw93dHcOGDcPZs2etXiDVXVSAJ3pF+UEIcFItERE5JIuDSnh4OHbv3o3CwkJs2LDBdHlydna2aUl8sh8J91WcVVm1/wrK9AaZqyEiIrKMxUHlhRdeQEJCAsLCwhAaGooHH3wQQMWQUPv27a1dH9XToHbB8PdUIyNfhy2nMuQuh4iIyCIWB5Vp06Zh9+7d+Pzzz7Fz504oFBWHiIqK4hwVO6R2UWBM13AAwFd7LVs5mIiISG4Wr6MCAN26dUO3bt0ghIAQApIkYdiwYdaujaxkQo9mWLr9PH47m4nLt4rQzI93miYiIsdQp3VUvvzyS7Rv3x5ubm5wc3NDhw4d8L///c/atZGVNPNzR99W/gCAr/fzUmUiInIcFgeVRYsWYerUqRg6dCi++eYbfPPNNxgyZAiee+45012Vyf4Y7//z7YErKC3npFoiInIMFg/9fPDBB1iyZAmeeuop07YRI0YgJiYG8+bNw4svvmjVAsk6BrQNQqBWg4x8HTadTMfgtgFyl0RERFQji8+opKWloXfv3lW29+7dG2lpaVYpiqxPpVRgXPeKSbUr9nL4h4iIHIPFQaVly5b45ptvqmxftWoVWrVqZZWiyDbGdQ+HJAG/n7+F1MxCucshIiKqkcVDP/Pnz8e4ceOwY8cO9OnTBwCwa9cubNmypdoAQ/YjzMcd/dsEYuvpDCzecg7+Ogl+qVno1TIQSoUkd3lERERVWBxURo8ejb179+K9997D2rVrAQBt27bFvn370LlzZ2vXR1bWJkiLracz8EvyDQBKfHn2AEK8XTF3eDsMiQ2RuzwiIiIzdbo8uWvXrkhMTMTBgwdx8OBBJCYmomnTpnjzzTetXR9Z0YbkNCzdfr7K9vTcEkxNPIQNyZxjRERE9qVOQaU6aWlpePXVV611OLIyvUFg/rqTENW8Ztw2f91J6A3V7UFERCQPqwUVsm/7UrOQllty19cFgLTcEuxLzWq4ooiIiGrAoNJIZOTfPaTUZT8iIqKGwKDSSARqXa26HxERUUOo9VU/s2fPvufrN2/erHcxZDs9In0R4u2K9NySauepSACCvV3RI9K3oUsjIiK6q1oHlcOHD9e4zwMPPFCvYsh2lAoJc4e3w9TEQ5CAasPK3OHtuJ4KERHZlVoHlW3bttmyDmoAQ2JDsOSJLpi/7qTZxFq1UsL74ztzHRUiIrI7Fi/4Ro5tSGwIBrYLxu5zGVi9dS9WX1SiVC/QJcJH7tKIiIiq4GTaRkipkNAz0hf9QgQ6NPUCAGw8cUPmqoiIiKpiUGnkBrULAgCuSktERHaJQaWRGxJTEVT2XMhCdmGpzNUQERGZY1Bp5CL83BEdrIXeIJB0ksM/RERkX+o0mTYnJwf79u1DRkYGDAaD2WtPPfWUVQqjhhMfG4LT6flYn5yGsd3D5S6HiIjIxOKgsm7dOiQkJKCgoABeXl6QpD/W3ZAkiUHFAcW3D8Z7m89g57lM5JWUwctVJXdJREREAOow9POXv/wFU6ZMQUFBAXJycpCdnW16ZGXxhnaOqFWgJ6ICPFCmF9h6KkPucoiIiEwsDirXrl3DzJkz4e7ubot6SAaSJCE+NhgAsJ5X/xARkR2xOKgMHjwYBw4csEUtJKP4ylVpt5+5iaLScpmrISIiqmDxHJVhw4bhr3/9K06ePIn27dtDpTKfzzBixAirFUcNJybUC2E+briaXYxfU25iaHsup09ERPKzOKg888wzAIDXX3+9ymuSJEGv19e/KmpwxuGf//6WivXJ6QwqRERkFywe+jEYDHd9MKQ4NuNNCbeeuoGSMn4viYhIflzwjUw6hzdBkJcGhaV67DybKXc5REREdQsq27dvx/Dhw9GyZUu0bNkSI0aMwG+//Wbt2qiBKRQShsQYr/5Jl7kaIiKiOgSVxMRExMXFwd3dHTNnzsTMmTPh5uaGAQMGYMWKFbaokRqQcfhn86kbKNMbatibiIjItiyeTLtw4UL861//wosvvmjaNnPmTCxatAhvvPEGJkyYYNUCqWH1iPSFn4catwpLsfv8LTzQOkDukoiIqBGz+IzKhQsXMHz48CrbR4wYgdTUVKsURfJRKiQMqryjMod/iIhIbhYHlfDwcGzZsqXK9s2bNyM8nDe0cwbG4Z+kk+nQG4TM1RARUWNm8dDPX/7yF8ycORNHjhxB7969AQC7du3C8uXL8e9//9vqBVLD6xXlBy9XF2QWlGL/xSzcF+Und0lERNRIWRxUpk6diuDgYLz77rv45ptvAABt27bFqlWr8Mgjj1i9QGp4ahcF4toFYfWha9iQnM6gQkREsrE4qADAqFGjMGrUKGvXQnYkPjbEFFRee7gdFApJ7pKIiKgR4oJvVK2+rfzhoVYiPa8ER67myF0OERE1UrUKKr6+vsjMrFip1MfHB76+vnd9kHNwVSnRPzoQALCBV/8QEZFMajX0895770Gr1Zo+liQOAzQG8bEh+OlYGtYnp+Hl+Gh+34mIqMHVKqhMnDjR9PGkSZNsVQvZmQfbBEDjosCVrGKcuJ6H2KbecpdERESNjMVzVJRKJTIyMqpsv3XrFpRKpVWKIvvgoXFBv8qVaTn8Q0REcrA4qAhR/QJgOp0OarW63gWRfYlvb7xJYZrMlRARUWNU68uT33//fQCAJEn49NNP4enpaXpNr9djx44diI6Otn6FJKuHooOgUko4f7MQZ2/ko1WQVu6SiIioEal1UHnvvfcAVJxRWbp0qdkwj1qtRvPmzbF06VLrV0iy8nZToU9Lf/yachPrk9MZVIiIqEHVOqgYbzjYv39/rF69Gj4+PjYriuxLfGywKajMHNBK7nKIiKgRsXiOyrZt2xhSGpmB7YKhVEg4lZaHS7cK5S6HiIgakTotoX/16lX8+OOPuHz5MkpLS81eW7RokVUKI/vh66FGz0hf/H7+FtYnp+O5fi3kLomIiBoJi4PKli1bMGLECERFReH06dOIjY3FxYsXIYRAly5dbFEj2YH42GAGFSIianAWD/28/PLLmDNnDo4fPw5XV1d8//33uHLlCvr164fHHnvMFjWSHRgcEwxJAo5eycH1nGK5yyEiokbC4qBy6tQpPPXUUwAAFxcXFBcXw9PTE6+//jrefvttqxdI9iHQyxVdm1XMTeLib0RE1FAsDioeHh6meSkhISE4f/686TXjjQvJOQ2JrVj8jUGFiIgaisVB5b777sPOnTsBAEOHDsVf/vIXLFy4EFOmTMF9991n9QLJfhiDyv5LWcjIL5G5GiIiagwsDiqLFi1Cz549AQDz58/HgAEDsGrVKjRv3hyfffaZ1Qsk+xHm444OYd4QAth04obc5RARUSNg8VU/UVFRpo89PDy4Gm0jMyQ2GMeu5mJDcjqeuC9C7nKIiMjJWXxGxVb++c9/QpIkvPDCC3KXQvcQHxsCANh94RayC0tr2JuIiKh+LA4qCoUCSqXyro+62L9/Pz7++GN06NChTp9PDSfS3wPRwVroDQJJpzj8Q0REtmXx0M+aNWvMnpeVleHw4cP44osvMH/+fIsLKCgoQEJCAv773/9iwYIFFn8+NbwhscE4nZ6PDcnpGNstXO5yiIjIiVkcVB555JEq28aMGYOYmBisWrUKf/rTnyw63vTp0zFs2DDExcXVGFR0Oh10Op3peV5eHoCKsFRWVmbR+9bEeDxrH9de1Ke/gdH+WLz5LH47exNZ+cXQutbpTgw25ezfP8D5e3T2/gDn75H9OT5b9WjJ8SQhhLDGm164cAEdOnRAQUFBrT9n5cqVWLhwIfbv3w9XV1c8+OCD6NSpExYvXlzt/vPmzav2rM2KFSvg7u5e19LJQkIAbx5RIqNEwpMt9egWYJUfISIiaiSKioowYcIE5ObmwsvL6577WuVX4eLiYrz//vto2rRprT/nypUrmDVrFpKSkuDq6lqrz3n55Zcxe/Zs0/O8vDyEh4dj0KBBNTZqqbKyMiQlJWHgwIFQqVRWPbY9qG9/p9VnsXRHKm6oQzB0aCfrF1hPzv79A5y/R2fvD3D+Htmf47NVj8YRkdqwOKj4+PhAkiTTcyEE8vPz4e7ujsTExFof5+DBg8jIyDC7kaFer8eOHTvw4YcfQqfTVZmcq9FooNFoqhxLpVLZ7IfElse2B3Xtb1iHpli6IxU7zmaiTEhwV9vf8A/g/N8/wPl7dPb+AOfvkf05Pmv3aMmxLP7f5b333jMLKgqFAgEBAejZsyd8fHxqfZwBAwbg+PHjZtsmT56M6Oho/O1vf6vzFUTUMGKbeiHMxw1Xs4uxPeUm4tuHyF0SERE5IYuDyqRJk6zyxlqtFrGxsWbbPDw84OfnV2U72R9JkjAkJhif7kzF+uR0BhUiIrKJWgWVY8eO1fqAXAul8YhvXxFUtp7OgK5cD40Lz4IREZF11SqodOrUCZIkoaYLhCRJgl6vr3Mxv/76a50/lxpe53AfBHlpcCNPh51nMzGgbZDcJRERkZOpVVBJTU21dR3kgBQKCYNjgvHl7ktYn5zOoEJERFZXq6ASEcGbz1H1hsRWBJWkkzdQpjdApbSb20cREZETqPM1pSdPnsTly5dRWmp+Y7oRI0bUuyhyHD2a+8LXQ42swlLsuXALfVsFyF0SERE5EYuDyoULFzBq1CgcP37cbN6K8ZLl+sxRIcfjolRgULsgrNx/BeuT0xlUiIjIqiw+Tz9r1ixERkYiIyMD7u7uOHHiBHbs2IFu3bpxMmwjNSQ2GACw6UQ69AYup09ERNZjcVDZvXs3Xn/9dfj7+0OhUEChUOD+++/HW2+9hZkzZ9qiRrJzvVv4Q+vqgsyCUhy4mCV3OURE5EQsDip6vR5arRYA4O/vj+vXrwOomHCbkpJi3erIIahdFBhYecXP+uR0mashIiJnYnFQiY2NxdGjRwEAPXv2xL/+9S/s2rULr7/+OqKioqxeIDkG4/DPxhPpMHD4h4iIrMTioPKPf/wDBoMBAPD6668jNTUVffv2xS+//IL333/f6gWSY3igdQDc1Uqk5Zbg6NUcucshIiInYfFVP4MHDzZ93LJlS5w+fRpZWVlV7qpMjYurSon+0YH4+VgaNiSno3Oz2t+gkoiI6G4sPqOSmJiIwsJCs22+vr4MKYT4yuGf9cnpNd5ugYiIqDYsDiovvvgigoKCMGHCBPzyyy9cN4VM+rcJhMZFgctZRTiZlid3OURE5AQsDippaWlYuXIlJEnC2LFjERISgunTp+P333+3RX3kQDw0LnigdcWCbxt49Q8REVmBxUHFxcUFDz/8ML766itkZGTgvffew8WLF9G/f3+0aNHCFjWSA7l9+IeIiKi+6nyvHwBwd3fH4MGDkZ2djUuXLuHUqVPWqosc1IC2QVApJZzLKMC5jHy0DNTKXRIRETmwOt3qtqioCF999RWGDh2Kpk2bYvHixRg1ahROnDhh7frIwXi7qdC7hT8AYP1xnlUhIqL6sTioPP744wgMDMSLL76IqKgo/Prrrzh37hzeeOMNREdH26JGcjAc/iEiImuxeOhHqVTim2++weDBg6FUKm1REzm4ge2C8Mqa4ziZlofLt4rQzM9d7pKIiMhBWXxGxTjkw5BCd+PnqUHPSD8AwPrkNJmrISIiR1broDJ06FDk5uaanv/zn/9ETk6O6fmtW7fQrl07qxZHjiu+PYd/iIio/modVDZu3AidTmd6/uabbyIrK8v0vLy8nHdPJpPBMRVB5ciVHKTlFstcDREROapaB5U7l0TnEul0L0FerugaUXG/n408q0JERHVUp8uTiWqDV/8QEVF91TqoSJJU5caDvBEh3Ytx+Gf/xSxkFuhq2JuIiKiqWl+eLITApEmToNFoAAAlJSV47rnn4OHhAQBm81eIACDc1x3tm3rj+LVcbDpxAxN6NpO7JCIicjC1DioTJ040e/7EE09U2eepp56qf0XkVIbEBuP4tVysT05jUCEiIovVOqgsW7bMlnWQk4qPDcY7G1Ow+/wt5BaVwdtdJXdJRETkQDiZlmwqKsATbYK0KDcIJJ26IXc5RETkYBhUyOaGVF79s4Gr1BIRkYUYVMjmjKvU7jibiQJduczVEBGRI2FQIZtrE6RFpL8HSssN2Ho6Q+5yiIjIgTCokM1JksThHyIiqhMGFWoQxlVqt52+ieJSvczVEBGRo2BQoQbRvqk3mjZxQ3GZHtvP3JS7HCIichAMKtQgOPxDRER1waBCDcY4/LPlVAZ05Rz+ISKimjGoUIPp0swHgVoN8nXl+P3cLbnLISIiB8CgQg1GoZBMd1Rez+EfIiKqBQYValDG4Z+kkzdQrjfIXA0REdk7BhVqUD0ifeHjrkJ2URn2pmbJXQ4REdk5BhVqUC5KBQa14/APERHVDoMKNbghlff+2XjiBgwGIXM1RERkzxhUqMH1aeEPrasLbubrcPByttzlEBGRHWNQoQandlEgrm0QAGD98XSZqyEiInvGoEKyMK5Su/FEOoTg8A8REVWPQYVk0a91ANzVSlzLKcaxq7lyl0NERHaKQYVk4apSon+bQADA+mQO/xARUfUYVEg2t9+kkMM/RERUHQYVkk3/6ECoXRS4eKsIp9Pz5S6HiIjsEIMKycZT44IHWgUA4PAPERFVj0GFZBV/2/APERHRnRhUSFZxbYPgopBw5kYBzt8skLscIiKyMwwqJCtvdxV6t/QHAGzg8A8REd2BQYVkZxz+4U0KiYjoTgwqJLtB7YKgkIDka3m4klUkdzlERGRHGFRIdn6eGvSI9AXA4R8iIjLHoEJ2IT42BACHf4iIyByDCtmFwTEV81QOXc5Bem6JzNUQEZG9YFAhuxDs7YouzZoAqLijMhEREcCgQnaEwz9ERHQnBhWyG8abFO5LzcKtAp3M1RARkT1gUCG7Ee7rjtimXjAIYNPJG3KXQ0REdkDWoPLWW2+he/fu0Gq1CAwMxMiRI5GSkiJnSSSzP4Z/OE+FiIhkDirbt2/H9OnTsWfPHiQlJaGsrAyDBg1CYWGhnGWRjIzDP7+fy0RuUZnM1RARkdxc5HzzDRs2mD1fvnw5AgMDcfDgQTzwwAMyVUVyahHgidZBnjhzowCbT93A6K5hcpdEREQykjWo3Ck3NxcA4OvrW+3rOp0OOt0fkyzz8vIAAGVlZSgrs+5v38bjWfu49sKe+xvUNhBnbhTgl+PXMaJDUJ2OYc/9WYuz9+js/QHO3yP7c3y26tGS40lCCGHVd68jg8GAESNGICcnBzt37qx2n3nz5mH+/PlVtq9YsQLu7u62LpEayLVC4F/HXOAiCSzsroerUu6KiIjImoqKijBhwgTk5ubCy8vrnvvaTVCZOnUq1q9fj507dyIsrPrT/dWdUQkPD0dmZmaNjVqqrKwMSUlJGDhwIFQqlVWPbQ/suT8hBAYu3oVLWUVYPLYDhrUPtvgY9tyftTh7j87eH+D8PbI/x2erHvPy8uDv71+roGIXQz8zZszATz/9hB07dtw1pACARqOBRqOpsl2lUtnsh8SWx7YH9tpffPsQLN1+HkmnbmJkl/A6H8de+7MmZ+/R2fsDnL9H9uf4rN2jJceS9aofIQRmzJiBNWvWYOvWrYiMjJSzHLIj8ZVX/2xLyUBJmV7maoiISC6yBpXp06cjMTERK1asgFarRXp6OtLT01FcXCxnWWQHOoR5o2kTNxSV6rH9zE25yyEiIpnIGlSWLFmC3NxcPPjggwgJCTE9Vq1aJWdZZAckSTLdUXkDF38jImq0ZJ2jYifzeMlOxbcPxue7UrH51A2UlhugduEdH4iIGhv+y092q2szHwRoNcgvKceu85lyl0NERDJgUCG7pVBIGBxTseDbhuMc/iEiaowYVMiuGW9SuOlkOsr1BpmrISKihsagQnatZ6QvfNxVyC4qw77ULLnLISKiBsagQnbNRanAwHYVwz/refUPEVGjw6BCds84/LPxRDoMBl4pRkTUmDCokN3r3dIPWo0LMvJ1OHQ5W+5yiIioATGokN3TuCgxoG0gAA7/EBE1Ngwq5BCGVA7/bEhO50KBRESNCIMKOYR+rQPgplLiWk4xjl/LlbscIiJqIAwq5BDc1Er0jw4AwOEfIqLGhEGFHAaHf4iIGh8GFXIYD0UHQu2iQGpmIVJu5MtdDhERNQAGFXIYnhoXPNDKHwCwnvf+ISJqFBhUyKHcPvxDRETOj0GFHMrAtkFwUUhIuZGPCzcL5C6HiIhsjEGFHIq3uwq9WvgB4NU/RESNAYMKOZx4Dv8QETUaDCrkcAbFBEEhAcev5eJKVpHc5RARkQ0xqJDD8ffUoHtzXwAVd1QmIiLnxaBCDik+NhgA56kQETk7BhVySMbLlA9eysaNvBKZqyEiIlthUCGHFOztis7NmgDg8A8RkTNjUCGHZRr+4Sq1REROi0GFHJbxMuW9qbdwq0AnczVERGQLDCrksMJ93RET6gWDAJJO3pC7HCIisgEGFXJovPqHiMi5MaiQQzNe/fP7+UzkFpfJXA0REVkbgwo5tJaBnmgV6IkyvcCWUxz+ISJyNgwq5PA4/ENE5LwYVMjhGYd/dpy5iUJduczVEBGRNTGokMNrG6JFhJ87dOUGbEvJkLscIiKyIgYVcniSJGEIh3+IiJwSgwo5BePib9tOZ6CkTC9zNUREZC0MKuQUOoZ5I9TbFUWleuw4c1PucoiIyEoYVMgpSJKEwZXDPxs4/ENE5DQYVMhpGId/kk7dQGm5QeZqiIjIGhhUyGl0jfCBv6cG+SXl2JOaJXc5RERkBQwq5DSUCgmDY4IAABtPcJVaIiJnwKBCTsU4/PNLcjr235SwNzULeoOQuSoiIqorF7kLILKmnOJSSBJQoNMj8ZwSiecOIMTbFXOHtzOtYEtERI6DZ1TIaWxITsPzKw5D3HECJT23BFMTD2FDcpo8hRERUZ0xqJBT0BsE5q87ieoGeYzb5q87yWEgIiIHw6BCTmFfahbSckvu+roAkJZbgn28GoiIyKFwjgo5hYz8u4eU281bdwIPtg5A2xAvtA3xQlSAB1RK5nUiInvFoEJOIVDrWqv9UtLzkZKeb3quVirQKsjTFFzahmjRLsQLTdzVtiqViIgswKBCTqFHpC9CvF2RnltS7TwVCYCfpxovDmyNlPR8nErLw+m0fOTrynHieh5OXM8z2z/E29UUXIwhprmfB5QKqUH6ISKiCgwq5BSUCglzh7fD1MRDkACzsGKMFgtGxppdoiyEwNXsYpxMy8Mp0yMfl7OKkJZbgrTcEmw9nWHa31WlQJtgL7S7LbxEB2uhdVU1SI9ERI0Rgwo5jSGxIVjyRBfMX3fSbGJt8F3WUZEkCeG+7gj3dcfgmGDT9vySMtNZl5NpFX+mpOejuEyPo1dycPRKjtlxwn3dEB1cEVyMISbcxx0Knn0hIqo3BhVyKkNiQzCwXTB2n8vApt/2YlDfnujVMtCiIRutqwrdmvuiW3Nf0za9QeDSrUKcqgwuxsf13BJcySrGlaxiJJ38Y9l+T40L2gRrzYaOooO1cFfzrxwRkSX4ryY5HaVCQs9IX9w6JdAz0tcq80qUCglRAZ6ICvDEsA5/nJnJKSo1Dy/peThzowAFunIcvJSNg5eyTftKEtDcz6MivFSegWkb6oVQb1dIkmU16g0Ce1OzcDBTgl9qlsVhjIjIUTCoENVDE3c1erXwQ68WfqZt5XoDLmQWVg4d5ZmCzM18HVIzC5GaWYhfjqeb9vd2UyE6WFs5dFQRYFoFecJVpaz2PTckp902vKXEl2d5mwAicl4MKkRW5qJUoHWQFq2DtHikU1PT9swCndmk3VNpeTiXUYDc4jLsTc3C3tsWo1MqJET5e1S5bPrgpWxM++pQlSubjLcJWPJEF4YVInIqDCpEDcTfU4O+rQLQt1WAaZuuXI9zGQU4bRw+Sq8IMVmFpTibUYCzGQX48eh10/4KCXe9TYCEitsEDGwXzGEgInIaDCpEMtK4KBET6o2YUG/TNiEEMvJ1t102XRFizmcU4F63KjLeJmDQou1o7u8Bf08N/LVq+Hlo4K/VwN9TjQBPDfw8NWjipuJVSUTkEBhUiOyMJEkI8nJFkJcr+rcJNG3/7uAVzPn2WI2ffz6zEOczC++5j4tCgq+HujLMaODvoTaFGX9PzW0PNXw91HBp4NsMcLIwERkxqBA5iKZN3Gu135xBreHnqUFmvg6ZBTpkFpRW/lnxcW5xGcoNFWdtMvJ1QNq9jydJgI+72hRi/Dz/CDQVZ2huCzyeamhcqp8EXFucLExEt2NQIXIQtblNQLC3K6Y+2PKeZx9Kyw24VahDZn4pMgt1lYHmjzBz67aPswpLYRBAVmEpsgpLceZGQY11al1dTGdjbj87Yww0Ado/tntozP8J2pCchqmJnCxMRH9gUCFyELW5TcDc4e1qHCJRuygQ4u2GEG+3Gt9TbxDIKiytEmBuFlQEnVuFlWdqKj8u0wvkl5Qjv6QcqTUMPwGAm0ppCjB+Hmr8fuEWJwsTkRkGFSIHYultAupLqZAQoNUgQKupcV8hBHKLy8yHm8zO1tw+BKVDSZkBxWV6XM0uxtXs4pqPj4rJwvH/3oEof09TXYGVfxof/p4aqBp4Tg0R2Q6DCpGDscZtAmxBkiQ0cVejibsaLQPvva8QAoWletwynqHJL8WW0zfw7YGrNb7PmRsFNQ5B+XpUXOFUbZjx1CDQS4MAT1d4ublYvCqwtTj7hGFn748aDoMKkQOyxW0CGpIkSfDUuMBT44IIPw8AFSv01iaovBjXCk3c1biZr6t4FOiQkV+Cm5Vnb4zDVVmFpUi5kX/PY6mVCrOzMeZBxnx7fScJ387ZJww7e3/UsBhUiMgu1Hay8IyHWt01mBkMAtlFpRXhJe+PIGMMNcZAczNfh7yScpTqDbiWU4xrOTUPPXm7qUxBprohp0CtKwK0Na9R4+wThp29P6PGcMbIXnq0i6Dyn//8B++88w7S09PRsWNHfPDBB+jRo4fcZRFRA7LGZGGFQoJf5SXU0cH3fr+SMn2VIHOz8pJt4/bMyo9L9QbkFpcht7gM5zLuPezkopDgX81ZmUBtxYThV3844bQThvUGgfnrTjptf0aN4YyRPfUoe1BZtWoVZs+ejaVLl6Jnz55YvHgxBg8ejJSUFAQG1jDQTUROpSEnC7uqlAj3dUe4773XpzFOEjYbasqrLuCUILuoYo2a9LwSpOeV3PO41b4XKiYMd30jCRqVAgpJgoSKoTKFApAgQSFVPJekiv/4FZJUsZ9xOwCFAmafK0l/PP9j3z8+VlTO06k4VuX7SQAq389sP7NjmW+7mV9i9n27W39zf0xGq0At1C4KqJUKqCr/VLtIUCuVUCklqF0UUCkV0FT+qXZRVNkmR9hpDGeM7K1H2YPKokWL8Mwzz2Dy5MkAgKVLl+Lnn3/G559/jpdeeknm6oioodnbZOHbJwm3CtLec1/jGjU386sPM6fT83DxVlGN75lTXAbUPBrlsBL3XLbKcZQKqSLUGIOMWei5LeCYnktQu1QEIY1x/7vu+8fnaCpDklKS8Mqa5LueMQKAV384gZaBWrOf15p+cmuazy3VeISaj1Hbz9cbhN2d9ZM1qJSWluLgwYN4+eWXTdsUCgXi4uKwe/fuKvvrdDrodDrT87y8PABAWVkZysrKrFqb8XjWPq69YH+Oz9l77BKmxS1/gS5hWhj05TDo5a6oZhIAf3cX+Lu7oG2QR5XX96Zm4YnPD9R4nIWPtENMqBeEAAQEDKLizI4QgEEICFT+KWC2j6HiyW37AMJgvr/p88yOUfkepvequg3ij/eofGrax3icy7cK8c2h6/dqDQDQK8oH3m5qlJYbUKY3oFRvQJleoLTcUO2225/fTm8Q0BsESsoMtfjuNIyb+TrELdoudxk2YzwrtvtcBnpG+tb5OJb8uyVrUMnMzIRer0dQUJDZ9qCgIJw+fbrK/m+99Rbmz59fZfumTZvg7l675cUtlZSUZJPj2gv25/icvUdn6s8ggCZqJXJKgep/zxZoogbcbxzDpQzb1iIBsN51TBWaqoFNtehvbOBN1OWXcSEAvQDKBVBuqHiYPReA3gCUC6nyz+r3Nb1mkO54fufrxs+RUG4A8suAnNKaC1dJArcv5XOPe4ne+eWxxi4WufN4BgHoRc09bvptL26dqns1RUU1n1k0kn3oxxIvv/wyZs+ebXqel5eH8PBwDBo0CF5eXlZ9r7KyMiQlJWHgwIFQqVRWPbY9YH+Oz9l7dNb+VM1v4PmVRwFUN2FYwoJHO2JwTFA1n+kYnLm/2p4RWza5e73ONsiptj0O6tuzXj0aR0RqQ9ag4u/vD6VSiRs3bphtv3HjBoKDq07Z12g00GiqrpCpUqls9g+ZLY9tD9if43P2Hp2tv4c7hcHFRdlgqws3NGfur1fLwFpdQu/Ilyo3VI+W/J2WNaio1Wp07doVW7ZswciRIwEABoMBW7ZswYwZM+QsjYjIZuxtwrC1OWt/1rrflj2zxx5lvyHG7Nmz8d///hdffPEFTp06halTp6KwsNB0FRARkTMyri7c1d8xVxeuibP2Z7yEPtjb1Wx7sLerU1yaDNhfj7LPURk3bhxu3ryJ1157Denp6ejUqRM2bNhQZYItERGRPXDWM0a3s6ceZQ8qADBjxgwO9RARkcNw9Ptt1Ya99Cj70A8RERHR3TCoEBERkd1iUCEiIiK7xaBCREREdotBhYiIiOwWgwoRERHZLQYVIiIislsMKkRERGS3GFSIiIjIbtnFyrR1JUTF7ZIsuV10bZWVlaGoqAh5eXlOdedWI/bn+Jy9R2fvD3D+Htmf47NVj8b/t43/j9+LQweV/Px8AEB4eLjMlRAREZGl8vPz4e3tfc99JFGbOGOnDAYDrl+/Dq1WC0my7j0I8vLyEB4ejitXrsDLy8uqx7YH7M/xOXuPzt4f4Pw9sj/HZ6sehRDIz89HaGgoFIp7z0Jx6DMqCoUCYWFhNn0PLy8vp/0BBNifM3D2Hp29P8D5e2R/js8WPdZ0JsWIk2mJiIjIbjGoEBERkd1iULkLjUaDuXPnQqPRyF2KTbA/x+fsPTp7f4Dz98j+HJ899OjQk2mJiIjIufGMChEREdktBhUiIiKyWwwqREREZLcYVIiIiMhuMajcYceOHRg+fDhCQ0MhSRLWrl0rd0lW9dZbb6F79+7QarUIDAzEyJEjkZKSIndZVrNkyRJ06NDBtDhRr169sH79ernLspl//vOfkCQJL7zwgtylWM28efMgSZLZIzo6Wu6yrOratWt44okn4OfnBzc3N7Rv3x4HDhyQuyyrad68eZXvoSRJmD59utylWYVer8err76KyMhIuLm5oUWLFnjjjTdqdd8aR5Gfn48XXngBERERcHNzQ+/evbF//35ZanHolWltobCwEB07dsSUKVPw6KOPyl2O1W3fvh3Tp09H9+7dUV5ejldeeQWDBg3CyZMn4eHhIXd59RYWFoZ//vOfaNWqFYQQ+OKLL/DII4/g8OHDiImJkbs8q9q/fz8+/vhjdOjQQe5SrC4mJgabN282PXdxcZ5/qrKzs9GnTx/0798f69evR0BAAM6ePQsfHx+5S7Oa/fv3Q6/Xm54nJydj4MCBeOyxx2SsynrefvttLFmyBF988QViYmJw4MABTJ48Gd7e3pg5c6bc5VnF008/jeTkZPzvf/9DaGgoEhMTERcXh5MnT6Jp06YNW4yguwIg1qxZI3cZNpWRkSEAiO3bt8tdis34+PiITz/9VO4yrCo/P1+0atVKJCUliX79+olZs2bJXZLVzJ07V3Ts2FHuMmzmb3/7m7j//vvlLqNBzZo1S7Ro0UIYDAa5S7GKYcOGiSlTpphte/TRR0VCQoJMFVlXUVGRUCqV4qeffjLb3qVLF/H3v/+9wevh0E8jl5ubCwDw9fWVuRLr0+v1WLlyJQoLC9GrVy+5y7Gq6dOnY9iwYYiLi5O7FJs4e/YsQkNDERUVhYSEBFy+fFnukqzmxx9/RLdu3fDYY48hMDAQnTt3xn//+1+5y7KZ0tJSJCYmYsqUKVa/eaxcevfujS1btuDMmTMAgKNHj2Lnzp2Ij4+XuTLrKC8vh16vh6urq9l2Nzc37Ny5s8HrcZ7zqWQxg8GAF154AX369EFsbKzc5VjN8ePH0atXL5SUlMDT0xNr1qxBu3bt5C7LalauXIlDhw7JNl5saz179sTy5cvRpk0bpKWlYf78+ejbty+Sk5Oh1WrlLq/eLly4gCVLlmD27Nl45ZVXsH//fsycORNqtRoTJ06UuzyrW7t2LXJycjBp0iS5S7Gal156CXl5eYiOjoZSqYRer8fChQuRkJAgd2lWodVq0atXL7zxxhto27YtgoKC8PXXX2P37t1o2bJlwxfU4OdwHAicfOjnueeeExEREeLKlStyl2JVOp1OnD17Vhw4cEC89NJLwt/fX5w4cULusqzi8uXLIjAwUBw9etS0zdmGfu6UnZ0tvLy8nGb4TqVSiV69eplte/7558V9990nU0W2NWjQIPHwww/LXYZVff311yIsLEx8/fXX4tixY+LLL78Uvr6+Yvny5XKXZjXnzp0TDzzwgAAglEql6N69u0hISBDR0dENXguDyj04c1CZPn26CAsLExcuXJC7FJsbMGCA+POf/yx3GVaxZs0a0z8cxgcAIUmSUCqVory8XO4SbaJbt27ipZdekrsMq2jWrJn405/+ZLbto48+EqGhoTJVZDsXL14UCoVCrF27Vu5SrCosLEx8+OGHZtveeOMN0aZNG5kqsp2CggJx/fp1IYQQY8eOFUOHDm3wGjhHpZERQmDGjBlYs2YNtm7disjISLlLsjmDwQCdTid3GVYxYMAAHD9+HEeOHDE9unXrhoSEBBw5cgRKpVLuEq2uoKAA58+fR0hIiNylWEWfPn2qLAlw5swZREREyFSR7SxbtgyBgYEYNmyY3KVYVVFRERQK8/8+lUolDAaDTBXZjoeHB0JCQpCdnY2NGzfikUceafAaOEflDgUFBTh37pzpeWpqKo4cOQJfX180a9ZMxsqsY/r06VixYgV++OEHaLVapKenAwC8vb3h5uYmc3X19/LLLyM+Ph7NmjVDfn4+VqxYgV9//RUbN26UuzSr0Gq1VeYTeXh4wM/Pz2nmGc2ZMwfDhw9HREQErl+/jrlz50KpVGL8+PFyl2YVL774Inr37o0333wTY8eOxb59+/DJJ5/gk08+kbs0qzIYDFi2bBkmTpzoVJeXA8Dw4cOxcOFCNGvWDDExMTh8+DAWLVqEKVOmyF2a1WzcuBFCCLRp0wbnzp3DX//6V0RHR2Py5MkNX0yDn8Oxc9u2bRMAqjwmTpwod2lWUV1vAMSyZcvkLs0qpkyZIiIiIoRarRYBAQFiwIABYtOmTXKXZVPONkdl3LhxIiQkRKjVatG0aVMxbtw4ce7cObnLsqp169aJ2NhYodFoRHR0tPjkk0/kLsnqNm7cKACIlJQUuUuxury8PDFr1izRrFkz4erqKqKiosTf//53odPp5C7NalatWiWioqKEWq0WwcHBYvr06SInJ0eWWiQhnGgpPSIiInIqnKNCREREdotBhYiIiOwWgwoRERHZLQYVIiIislsMKkRERGS3GFSIiIjIbjGoEBERkd1iUCEihydJEtauXSt3GURkAwwqRFQvkyZNgiRJVR5DhgyRuzQicgLOdQMGIpLFkCFDsGzZMrNtGo1GpmqIyJnwjAoR1ZtGo0FwcLDZw8fHB0DFsMySJUsQHx8PNzc3REVF4bvvvjP7/OPHj+Ohhx6Cm5sb/Pz88Oc//xkFBQVm+3z++eeIiYmBRqNBSEgIZsyYYfZ6ZmYmRo0aBXd3d7Rq1Qo//vij6bXs7GwkJCQgICAAbm5uaNWqVZVgRUT2iUGFiGzu1VdfxejRo3H06FEkJCTg8ccfx6lTpwAAhYWFGDx4MHx8fLB//358++232Lx5s1kQWbJkCaZPn44///nPOH78OH788Ue0bNnS7D3mz5+PsWPH4tixYxg6dCgSEhKQlZVlev+TJ09i/fr1OHXqFJYsWQJ/f/+G+wIQUd3JcitEInIaEydOFEqlUnh4eJg9Fi5cKISouGP3c889Z/Y5PXv2FFOnThVCCPHJJ58IHx8fUVBQYHr9559/FgqFQqSnpwshhAgNDRV///vf71oDAPGPf/zD9LygoEAAEOvXrxdCCDF8+HAxefJk6zRMRA2Kc1SIqN769++PJUuWmG3z9fU1fdyrVy+z13r16oUjR44AAE6dOoWOHTvCw8PD9HqfPn1gMBiQkpICSZJw/fp1DBgw4J41dOjQwfSxh4cHvLy8kJGRAQCYOnUqRo8ejUOHDmHQoEEYOXIkevfuXadeiahhMagQUb15eHhUGYqxFjc3t1rtp1KpzJ5LkgSDwQAAiI+Px6VLl/DLL78gKSkJAwYMwPTp0/F///d/Vq+XiKyLc1SIyOb27NlT5Xnbtm0BAG3btsXRo0dRWFhoen3Xrl1QKBRo06YNtFotmjdvji1bttSrhoCAAEycOBGJiYlYvHgxPvnkk3odj4gaBs+oEFG96XQ6pKenm21zcXExTVj99ttv0a1bN9x///346quvsG/fPnz22WcAgISEBMydOxcTJ07EvHnzcPPmTTz//PN48sknERQUBACYN28ennvuOQQGBiI+Ph75+fnYtWsXnn/++VrV99prr6Fr166IiYmBTqfDTz/9ZApKRGTfGFSIqN42bNiAkJAQs21t2rTB6dOnAVRckbNy5UpMmzYNISEh+Prrr9GuXTsAgLu7OzZu3IhZs2ahe/fucHd3x+jRo7Fo0SLTsSZOnIiSkhK89957mDNnDvz9/TFmzJha16dWq/Hyyy/j4sWLcHNzQ9++fbFy5UordE5EtiYJIYTcRRCR85IkCWvWrMHIkSPlLoWIHBDnqBAREZHdYlAhIiIiu8U5KkRkUxxdJqL64BkVIiIislsMKkRERGS3GFSIiIjIbjGoEBERkd1iUCEiIiK7xaBCREREdotBhYiIiOwWgwoRERHZLQYVIiIislv/D/KypAveE6obAAAAAElFTkSuQmCC",
520
+ "text/plain": [
521
+ "<Figure size 640x480 with 1 Axes>"
522
+ ]
523
+ },
524
+ "metadata": {},
525
+ "output_type": "display_data"
526
+ }
527
+ ],
528
+ "source": [
529
+ "import matplotlib.pyplot as plt\n",
530
+ "\n",
531
+ "# Given numbers\n",
532
+ "numbers = [\n",
533
+ " 9.069988300325349e-05,\n",
534
+ " 7.004399230936542e-05,\n",
535
+ " 9.137028973782435e-05,\n",
536
+ " 5.340397547115572e-05,\n",
537
+ " 5.0301870942348614e-05,\n",
538
+ " 9.043936188390944e-06,\n",
539
+ " 4.6886875679774676e-06,\n",
540
+ " 4.490133960644016e-06,\n",
541
+ " 6.136821866675746e-06,\n",
542
+ " 3.3243470625166083e-06,\n",
543
+ " 2.348009729757905e-06,\n",
544
+ " 2.1804094103572425e-06,\n",
545
+ " 1.958705070137512e-06,\n",
546
+ " 6.988730092416517e-07,\n",
547
+ " 5.00343162457284e-07,\n",
548
+ " 4.1343139400851214e-07,\n",
549
+ " 5.06081335061026e-07,\n",
550
+ " 7.039822662591178e-07,\n",
551
+ " 5.087575800644117e-07,\n",
552
+ " 2.0332389993882316e-08,\n",
553
+ " 1.718821529550496e-08,\n",
554
+ " 1.5028433608677005e-08,\n",
555
+ " 3.9828059072988253e-08,\n",
556
+ " 2.8266715190738978e-08,\n",
557
+ " 2.1497044144780375e-08,\n",
558
+ " 9.854548288501519e-09\n",
559
+ "]\n",
560
+ "\n",
561
+ "# Sorting the numbers in descending order\n",
562
+ "numbers.sort(reverse=True)\n",
563
+ "\n",
564
+ "# Selecting every 3rd number for plotting\n",
565
+ "selected_numbers = numbers[::3]\n",
566
+ "\n",
567
+ "# Creating x-axis values (epochs)\n",
568
+ "epochs = list(range(1, len(selected_numbers) + 1))\n",
569
+ "\n",
570
+ "# Plotting the curve\n",
571
+ "plt.plot(epochs, selected_numbers, marker='o', linestyle='-')\n",
572
+ "plt.xlabel('Epochs')\n",
573
+ "plt.ylabel('Evaluation Loss')\n",
574
+ "plt.title('Evaluation Loss vs Epochs')\n",
575
+ "plt.grid(True)\n",
576
+ "plt.show()\n"
577
+ ]
578
+ },
579
+ {
580
+ "cell_type": "code",
581
+ "execution_count": 5,
582
+ "metadata": {},
583
+ "outputs": [
584
+ {
585
+ "name": "stdout",
586
+ "output_type": "stream",
587
+ "text": [
588
+ "Running on local URL: http://127.0.0.1:7862\n",
589
+ "\n",
590
+ "To create a public link, set `share=True` in `launch()`.\n"
591
+ ]
592
+ },
593
+ {
594
+ "data": {
595
+ "text/html": [
596
+ "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
597
+ ],
598
+ "text/plain": [
599
+ "<IPython.core.display.HTML object>"
600
+ ]
601
+ },
602
+ "metadata": {},
603
+ "output_type": "display_data"
604
+ },
605
+ {
606
+ "data": {
607
+ "text/plain": []
608
+ },
609
+ "execution_count": 5,
610
+ "metadata": {},
611
+ "output_type": "execute_result"
612
+ }
613
+ ],
614
+ "source": [
615
+ "import gradio as gr\n",
616
+ "import time\n",
617
+ "def my_function(x, progress=gr.Progress()):\n",
618
+ " progress(0, desc=\"Starting...\")\n",
619
+ " time.sleep(1)\n",
620
+ " for i in progress.tqdm(range(100)):\n",
621
+ " time.sleep(0.1)\n",
622
+ " return x\n",
623
+ "with gr.Blocks() as demo:\n",
624
+ " a=gr.Textbox()\n",
625
+ " b=gr.TextArea(\"tt\")\n",
626
+ " btn=gr.Button(\"Test\")\n",
627
+ " btn.click(my_function,a,b)\n",
628
+ "demo.launch()"
629
+ ]
630
+ },
631
+ {
632
+ "cell_type": "code",
633
+ "execution_count": null,
634
+ "metadata": {},
635
+ "outputs": [],
636
+ "source": [
637
+ "if self.quantization == '8':\n",
638
+ " bnb_config = BitsAndBytesConfig( \n",
639
+ " load_in_8bit= True,\n",
640
+ " )\n",
641
+ "elif self.quantization == '4':\n",
642
+ " bnb_config = BitsAndBytesConfig(\n",
643
+ " load_in_4bit= True,\n",
644
+ " bnb_4bit_use_double_quant=True,\n",
645
+ " bnb_4bit_quant_type=\"nf4\", \n",
646
+ " bnb_4bit_compute_dtype=torch.bfloat16\n",
647
+ " )\n",
648
+ "model = AutoModelForCausalLM.from_pretrained(\n",
649
+ " base_model,\n",
650
+ " quantization_config=bnb_config,\n",
651
+ " torch_dtype=torch.bfloat16,\n",
652
+ " device_map=\"auto\",\n",
653
+ " trust_remote_code=True,\n",
654
+ " )"
655
+ ]
656
+ },
657
+ {
658
+ "cell_type": "code",
659
+ "execution_count": null,
660
+ "metadata": {},
661
+ "outputs": [],
662
+ "source": [
663
+ "config = LoraConfig(\n",
664
+ " r= lora_r if lora_r else 16,\n",
665
+ " lora_alpha= lora_alpha if lora_alpha else 32,\n",
666
+ " target_modules=[\"q_proj\", \"v_proj\",\"k_proj\",\"o_proj\",\"gate_proj\",\"up_proj\",\"down_proj\"], \n",
667
+ " lora_dropout= lora_dropout if lora_dropout else 0.05,\n",
668
+ " bias=\"none\",\n",
669
+ " task_type=\"CAUSAL_LM\")\n",
670
+ "\n",
671
+ "training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,\n",
672
+ " gradient_accumulation_steps=GRAD_ACC,\n",
673
+ " optim=OPTIMIZER,\n",
674
+ " learning_rate=LR,\n",
675
+ " fp16=True, \n",
676
+ " logging_steps=10,\n",
677
+ " num_train_epochs = epoch if epoch else 2,\n",
678
+ " output_dir=lora_output,\n",
679
+ " remove_unused_columns=True,\n",
680
+ " )"
681
+ ]
682
+ },
683
+ {
684
+ "cell_type": "code",
685
+ "execution_count": null,
686
+ "metadata": {},
687
+ "outputs": [],
688
+ "source": []
689
+ }
690
+ ],
691
+ "metadata": {
692
+ "kernelspec": {
693
+ "display_name": "lang",
694
+ "language": "python",
695
+ "name": "python3"
696
+ },
697
+ "language_info": {
698
+ "codemirror_mode": {
699
+ "name": "ipython",
700
+ "version": 3
701
+ },
702
+ "file_extension": ".py",
703
+ "mimetype": "text/x-python",
704
+ "name": "python",
705
+ "nbconvert_exporter": "python",
706
+ "pygments_lexer": "ipython3",
707
+ "version": "3.10.13"
708
+ }
709
+ },
710
+ "nbformat": 4,
711
+ "nbformat_minor": 2
712
+ }
utils.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import datetime
3
+ import gradio as gr
4
+ import os
5
+ # It shows the demo data format in finetuning tab
6
+ def move_to(move,model_ans):
7
+ df_temp=pd.read_excel(os.path.join("model_ans",str(model_ans)))
8
+ id_temp=int((df_temp.loc[move])['id'])
9
+ ques_temp=(df_temp.loc[move])['question']
10
+ ans_temp=(df_temp.loc[move])['answer']
11
+ if int(move)>=len(df_temp)+1:
12
+ gr.Info(f"Number of questions: {len(df_temp)}")
13
+ move=0
14
+ return [
15
+ gr.Label(value=str(id_temp),label="ID"),
16
+ gr.Label(value=ques_temp,label="Question"),
17
+ gr.Label(value=ans_temp,label="Answer")
18
+ ]
19
+ def display_table(path=r"data/demo_table_data.xlsx"):
20
+ df = pd.read_excel(path)
21
+ df_with_custom_index = df.head(2)
22
+ # df_with_custom_index.index = [f"Row {i+1}" for i in range(len(df_with_custom_index))]
23
+ html_table = df_with_custom_index.to_html(index=False)
24
+ return f"<div style='overflow-x:auto;'>{html_table}</div>"
25
+ def current_time():
26
+ # ff="model_ans_llama_finetuned486_rag_ensemble"
27
+ # df=pd.read_excel(r"model_ans/model_ans_mistral_finetuned486_rag_ensemble.xlsx")
28
+ current_datetime = datetime.datetime.now()
29
+ # file_name = current_datetime.strftime("%Y_%m_%d_%H_%M_%S")+ff
30
+ return current_datetime.strftime("%Y_%m_%d_%H_%M_%S")
31
+ # This function use in human evaluation
32
+ def random_ques_ans2():
33
+ import random
34
+ import pandas as pd
35
+ df=pd.read_excel(r"data/existing_dataset.xlsx")
36
+ id=random.randint(0,len(df))
37
+ ques_temp=(df.loc[id])['question']
38
+ ans_temp=""
39
+ return ques_temp,ans_temp
40
+ def score_report_bar():
41
+ path="score_report"
42
+ import os
43
+ import math
44
+ dat=[]
45
+ for x in os.listdir(path):
46
+ wh=[]
47
+ flag=0
48
+ for x2 in x:
49
+ if x2>='a' and x2<='z':
50
+ flag=1
51
+ wh.append(x2)
52
+ elif flag==1:
53
+ wh.append(" ")
54
+ wh=''.join(wh)
55
+ wh=wh.replace("model ans","")
56
+ wh=wh.replace("finetuned","")
57
+ wh=wh.replace(" "," ")
58
+ wh=wh.replace("xlsx","")
59
+ df_temp=pd.read_excel(os.path.join(path,x))
60
+ rating=sum(df_temp["rating"])/len(df_temp)
61
+ dat.append({
62
+ "Model Name":wh,
63
+ "Average Rating":rating
64
+ })
65
+ temp=pd.DataFrame(dat)
66
+ return temp
67
+ def parse_data(link,progress):
68
+ from bs4 import BeautifulSoup
69
+ import requests
70
+ import re
71
+ from docx import Document
72
+ from langchain_community.document_loaders import WebBaseLoader
73
+ s=set()
74
+ import time
75
+ start_time = time.time()
76
+ duration = 5
77
+ def get_links(url):
78
+ response = requests.get(url)
79
+ data = response.text
80
+ soup = BeautifulSoup(data, 'lxml')
81
+
82
+ links = []
83
+ for link in soup.find_all('a'):
84
+ link_url = link.get('href')
85
+ if link_url is not None and link_url.startswith('http'):
86
+ s.add(link_url)
87
+ links.append(link_url)
88
+
89
+ return links
90
+ # def write_to_file(links):
91
+ # with open('data.txt', 'a') as f:
92
+ # f.writelines(links)
93
+ def get_all_links(url):
94
+ for link in get_links(url):
95
+ if (time.time() - start_time) >= duration:
96
+ return
97
+ get_all_links(link)
98
+
99
+ def data_ret2(link):
100
+ loader = WebBaseLoader(f"{link}")
101
+ data = loader.load()
102
+ return data[0].page_content
103
+ # link = 'https://kuet.ac.bd'
104
+ s.add(link)
105
+ get_all_links(link)
106
+ li=list(s)
107
+ all_data=[]
108
+ for x in progress.tqdm(li):
109
+ try:
110
+ print("Link: ",x)
111
+ all_data.append(data_ret2(x))
112
+ except:
113
+ print("pass")
114
+ continue
115
+ all_data2 = re.sub(r'\n+', '\n\n', "\n".join(all_data))
116
+ all_data2=re.sub(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]+', '', all_data2)
117
+ document = Document()
118
+ document.add_paragraph(all_data2)
119
+ document.save(f'rag_data/{link}.docx')
120
+ print("Finished!!")
121
+ return
122
+ def all_contri_ans(id, ques):
123
+ folder_path = 'save_ques_ans'
124
+ data_frames = []
125
+ for filename in os.listdir(folder_path):
126
+ if filename.endswith(".xlsx") or filename.endswith(".xls"):
127
+ file_path = os.path.join(folder_path, filename)
128
+ df = pd.read_excel(file_path)
129
+ data_frames.append(df)
130
+
131
+ df_hum = pd.concat(data_frames, ignore_index=True)
132
+ temp=[]
133
+ for x,y in zip(df_hum['question'],df_hum['answer']):
134
+ if x==ques:
135
+ temp.append(y)
136
+ if len(temp)==0:
137
+ temp=["This question's answer is not available."]
138
+ return temp
139
+ import json
140
+ import os
141
+
142
+ def save_params_to_file(model_name,embedding_name, splitter_type_dropdown, chunk_size_slider,
143
+ chunk_overlap_slider, separator_textbox, max_tokens_slider, filename="params.txt"):
144
+ params = {
145
+ "model_name":model_name,
146
+ "embedding_name": embedding_name,
147
+ "splitter_type_dropdown": splitter_type_dropdown,
148
+ "chunk_size_slider": chunk_size_slider,
149
+ "chunk_overlap_slider": chunk_overlap_slider,
150
+ "separator_textbox": separator_textbox,
151
+ "max_tokens_slider": max_tokens_slider
152
+ }
153
+
154
+ with open(filename, 'w') as f:
155
+ json.dump(params, f)
156
+ with open("deploy//params.txt", 'w') as f:
157
+ json.dump(params, f)
158
+
159
+ def load_params_from_file(filename="params.txt"):
160
+ if os.path.exists(filename):
161
+ with open(filename, 'r') as f:
162
+ params = json.load(f)
163
+ return params
164
+ else:
165
+ return None