IIEleven11 commited on
Commit
effa0ab
·
verified ·
1 Parent(s): 75cb021

Upload folder using huggingface_hub

Browse files
conds.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6552d70568833628ba019c6b03459e77fe71ca197d5c560cef9411bee9d87f4e
3
+ size 107374
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124c430cd7cbb85ba7d9981a7bbae032b7b2e655c22dcd35f7f79beeadffdacb
3
+ size 2129654648
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:427f2d90bb72f9af91eae0df1070a0620c8e0860c6b9e79e5ff3a194457aff97
3
+ size 4259421963
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:777940f94385b4bcd2af00fd0e4fdc0aafcac5f34ddb0b107b8396f79cb8ba56
3
+ size 14645
s3gen.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b78103c654207393955e4900aac14a12de8ef25f4b09424f1ef91941f161d4e
3
+ size 1056484620
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb68af34417121f1f3a3518a567c5ccf8f07990cbc83b85f3bed894c554af616
3
+ size 1465
t3_cfg.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d635376a2048b4c898e16570e550af61d73e1930ccd73661ba7e003aaf7541
3
+ size 2129653744
tokenizer.json ADDED
@@ -0,0 +1,1435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "special": true,
9
+ "content": "[STOP]",
10
+ "single_word": false,
11
+ "lstrip": false,
12
+ "rstrip": false,
13
+ "normalized": false
14
+ },
15
+ {
16
+ "id": 1,
17
+ "special": true,
18
+ "content": "[UNK]",
19
+ "single_word": false,
20
+ "lstrip": false,
21
+ "rstrip": false,
22
+ "normalized": false
23
+ },
24
+ {
25
+ "id": 2,
26
+ "special": true,
27
+ "content": "[SPACE]",
28
+ "single_word": false,
29
+ "lstrip": false,
30
+ "rstrip": false,
31
+ "normalized": false
32
+ },
33
+ {
34
+ "id": 255,
35
+ "special": true,
36
+ "content": "[START]",
37
+ "single_word": false,
38
+ "lstrip": false,
39
+ "rstrip": false,
40
+ "normalized": false
41
+ },
42
+ {
43
+ "id": 604,
44
+ "content": "[UH]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ {
52
+ "id": 605,
53
+ "content": "[UM]",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
+ },
60
+ {
61
+ "id": 606,
62
+ "content": "[giggle]",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": false,
67
+ "special": true
68
+ },
69
+ {
70
+ "id": 607,
71
+ "content": "[laughter]",
72
+ "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": false,
76
+ "special": true
77
+ },
78
+ {
79
+ "id": 608,
80
+ "content": "[guffaw]",
81
+ "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": false,
85
+ "special": true
86
+ },
87
+ {
88
+ "id": 609,
89
+ "content": "[inhale]",
90
+ "single_word": false,
91
+ "lstrip": false,
92
+ "rstrip": false,
93
+ "normalized": false,
94
+ "special": true
95
+ },
96
+ {
97
+ "id": 610,
98
+ "content": "[exhale]",
99
+ "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
+ },
105
+ {
106
+ "id": 611,
107
+ "content": "[sigh]",
108
+ "single_word": false,
109
+ "lstrip": false,
110
+ "rstrip": false,
111
+ "normalized": false,
112
+ "special": true
113
+ },
114
+ {
115
+ "id": 612,
116
+ "content": "[cry]",
117
+ "single_word": false,
118
+ "lstrip": false,
119
+ "rstrip": false,
120
+ "normalized": false,
121
+ "special": true
122
+ },
123
+ {
124
+ "id": 613,
125
+ "content": "[bark]",
126
+ "single_word": false,
127
+ "lstrip": false,
128
+ "rstrip": false,
129
+ "normalized": false,
130
+ "special": true
131
+ },
132
+ {
133
+ "id": 614,
134
+ "content": "[howl]",
135
+ "single_word": false,
136
+ "lstrip": false,
137
+ "rstrip": false,
138
+ "normalized": false,
139
+ "special": true
140
+ },
141
+ {
142
+ "id": 615,
143
+ "content": "[meow]",
144
+ "single_word": false,
145
+ "lstrip": false,
146
+ "rstrip": false,
147
+ "normalized": false,
148
+ "special": true
149
+ },
150
+ {
151
+ "id": 616,
152
+ "content": "[singing]",
153
+ "single_word": false,
154
+ "lstrip": false,
155
+ "rstrip": false,
156
+ "normalized": false,
157
+ "special": true
158
+ },
159
+ {
160
+ "id": 617,
161
+ "content": "[music]",
162
+ "single_word": false,
163
+ "lstrip": false,
164
+ "rstrip": false,
165
+ "normalized": false,
166
+ "special": true
167
+ },
168
+ {
169
+ "id": 618,
170
+ "content": "[whistle]",
171
+ "single_word": false,
172
+ "lstrip": false,
173
+ "rstrip": false,
174
+ "normalized": false,
175
+ "special": true
176
+ },
177
+ {
178
+ "id": 619,
179
+ "content": "[humming]",
180
+ "single_word": false,
181
+ "lstrip": false,
182
+ "rstrip": false,
183
+ "normalized": false,
184
+ "special": true
185
+ },
186
+ {
187
+ "id": 620,
188
+ "content": "[gasp]",
189
+ "single_word": false,
190
+ "lstrip": false,
191
+ "rstrip": false,
192
+ "normalized": false,
193
+ "special": true
194
+ },
195
+ {
196
+ "id": 621,
197
+ "content": "[groan]",
198
+ "single_word": false,
199
+ "lstrip": false,
200
+ "rstrip": false,
201
+ "normalized": false,
202
+ "special": true
203
+ },
204
+ {
205
+ "id": 622,
206
+ "content": "[whisper]",
207
+ "single_word": false,
208
+ "lstrip": false,
209
+ "rstrip": false,
210
+ "normalized": false,
211
+ "special": true
212
+ },
213
+ {
214
+ "id": 623,
215
+ "content": "[mumble]",
216
+ "single_word": false,
217
+ "lstrip": false,
218
+ "rstrip": false,
219
+ "normalized": false,
220
+ "special": true
221
+ },
222
+ {
223
+ "id": 624,
224
+ "content": "[sniff]",
225
+ "single_word": false,
226
+ "lstrip": false,
227
+ "rstrip": false,
228
+ "normalized": false,
229
+ "special": true
230
+ },
231
+ {
232
+ "id": 625,
233
+ "content": "[sneeze]",
234
+ "single_word": false,
235
+ "lstrip": false,
236
+ "rstrip": false,
237
+ "normalized": false,
238
+ "special": true
239
+ },
240
+ {
241
+ "id": 626,
242
+ "content": "[cough]",
243
+ "single_word": false,
244
+ "lstrip": false,
245
+ "rstrip": false,
246
+ "normalized": false,
247
+ "special": true
248
+ },
249
+ {
250
+ "id": 627,
251
+ "content": "[snore]",
252
+ "single_word": false,
253
+ "lstrip": false,
254
+ "rstrip": false,
255
+ "normalized": false,
256
+ "special": true
257
+ },
258
+ {
259
+ "id": 628,
260
+ "content": "[chew]",
261
+ "single_word": false,
262
+ "lstrip": false,
263
+ "rstrip": false,
264
+ "normalized": false,
265
+ "special": true
266
+ },
267
+ {
268
+ "id": 629,
269
+ "content": "[sip]",
270
+ "single_word": false,
271
+ "lstrip": false,
272
+ "rstrip": false,
273
+ "normalized": false,
274
+ "special": true
275
+ },
276
+ {
277
+ "id": 630,
278
+ "content": "[clear_throat]",
279
+ "single_word": false,
280
+ "lstrip": false,
281
+ "rstrip": false,
282
+ "normalized": false,
283
+ "special": true
284
+ },
285
+ {
286
+ "id": 631,
287
+ "content": "[kiss]",
288
+ "single_word": false,
289
+ "lstrip": false,
290
+ "rstrip": false,
291
+ "normalized": false,
292
+ "special": true
293
+ },
294
+ {
295
+ "id": 632,
296
+ "content": "[shhh]",
297
+ "single_word": false,
298
+ "lstrip": false,
299
+ "rstrip": false,
300
+ "normalized": false,
301
+ "special": true
302
+ },
303
+ {
304
+ "id": 633,
305
+ "content": "[gibberish]",
306
+ "single_word": false,
307
+ "lstrip": false,
308
+ "rstrip": false,
309
+ "normalized": false,
310
+ "special": true
311
+ },
312
+ {
313
+ "id": 634,
314
+ "content": "[fr]",
315
+ "single_word": false,
316
+ "lstrip": false,
317
+ "rstrip": false,
318
+ "normalized": false,
319
+ "special": true
320
+ },
321
+ {
322
+ "id": 635,
323
+ "content": "[es]",
324
+ "single_word": false,
325
+ "lstrip": false,
326
+ "rstrip": false,
327
+ "normalized": false,
328
+ "special": true
329
+ },
330
+ {
331
+ "id": 636,
332
+ "content": "[de]",
333
+ "single_word": false,
334
+ "lstrip": false,
335
+ "rstrip": false,
336
+ "normalized": false,
337
+ "special": true
338
+ },
339
+ {
340
+ "id": 637,
341
+ "content": "[it]",
342
+ "single_word": false,
343
+ "lstrip": false,
344
+ "rstrip": false,
345
+ "normalized": false,
346
+ "special": true
347
+ },
348
+ {
349
+ "id": 638,
350
+ "content": "[ipa]",
351
+ "single_word": false,
352
+ "lstrip": false,
353
+ "rstrip": false,
354
+ "normalized": false,
355
+ "special": true
356
+ },
357
+ {
358
+ "id": 639,
359
+ "content": "[end_of_label]",
360
+ "single_word": false,
361
+ "lstrip": false,
362
+ "rstrip": false,
363
+ "normalized": false,
364
+ "special": true
365
+ },
366
+ {
367
+ "id": 695,
368
+ "content": "[PLACEHOLDER55]",
369
+ "single_word": false,
370
+ "lstrip": false,
371
+ "rstrip": false,
372
+ "normalized": false,
373
+ "special": true
374
+ },
375
+ {
376
+ "id": 696,
377
+ "content": "[PLACEHOLDER56]",
378
+ "single_word": false,
379
+ "lstrip": false,
380
+ "rstrip": false,
381
+ "normalized": false,
382
+ "special": true
383
+ },
384
+ {
385
+ "id": 697,
386
+ "content": "[PLACEHOLDER57]",
387
+ "single_word": false,
388
+ "lstrip": false,
389
+ "rstrip": false,
390
+ "normalized": false,
391
+ "special": true
392
+ },
393
+ {
394
+ "id": 698,
395
+ "content": "[PLACEHOLDER58]",
396
+ "single_word": false,
397
+ "lstrip": false,
398
+ "rstrip": false,
399
+ "normalized": false,
400
+ "special": true
401
+ },
402
+ {
403
+ "id": 699,
404
+ "content": "[PLACEHOLDER59]",
405
+ "single_word": false,
406
+ "lstrip": false,
407
+ "rstrip": false,
408
+ "normalized": false,
409
+ "special": true
410
+ },
411
+ {
412
+ "id": 700,
413
+ "content": "[PLACEHOLDER60]",
414
+ "single_word": false,
415
+ "lstrip": false,
416
+ "rstrip": false,
417
+ "normalized": false,
418
+ "special": true
419
+ },
420
+ {
421
+ "id": 701,
422
+ "content": "[PLACEHOLDER61]",
423
+ "single_word": false,
424
+ "lstrip": false,
425
+ "rstrip": false,
426
+ "normalized": false,
427
+ "special": true
428
+ },
429
+ {
430
+ "id": 702,
431
+ "content": "[PLACEHOLDER62]",
432
+ "single_word": false,
433
+ "lstrip": false,
434
+ "rstrip": false,
435
+ "normalized": false,
436
+ "special": true
437
+ },
438
+ {
439
+ "id": 703,
440
+ "content": "[PLACEHOLDER63]",
441
+ "single_word": false,
442
+ "lstrip": false,
443
+ "rstrip": false,
444
+ "normalized": false,
445
+ "special": true
446
+ }
447
+ ],
448
+ "normalizer": null,
449
+ "pre_tokenizer": {
450
+ "type": "Whitespace"
451
+ },
452
+ "post_processor": null,
453
+ "decoder": null,
454
+ "model": {
455
+ "type": "BPE",
456
+ "dropout": null,
457
+ "unk_token": "[UNK]",
458
+ "continuing_subword_prefix": null,
459
+ "end_of_word_suffix": null,
460
+ "fuse_unk": false,
461
+ "vocab": {
462
+ "[STOP]": 0,
463
+ "[UNK]": 1,
464
+ "[SPACE]": 2,
465
+ "!": 3,
466
+ "'": 4,
467
+ "(": 5,
468
+ ")": 6,
469
+ ",": 7,
470
+ "-": 8,
471
+ ".": 9,
472
+ "/": 10,
473
+ ":": 11,
474
+ ";": 12,
475
+ "?": 13,
476
+ "a": 14,
477
+ "b": 15,
478
+ "c": 16,
479
+ "d": 17,
480
+ "e": 18,
481
+ "f": 19,
482
+ "g": 20,
483
+ "h": 21,
484
+ "i": 22,
485
+ "j": 23,
486
+ "k": 24,
487
+ "l": 25,
488
+ "m": 26,
489
+ "n": 27,
490
+ "o": 28,
491
+ "p": 29,
492
+ "q": 30,
493
+ "r": 31,
494
+ "s": 32,
495
+ "t": 33,
496
+ "u": 34,
497
+ "v": 35,
498
+ "w": 36,
499
+ "x": 37,
500
+ "y": 38,
501
+ "z": 39,
502
+ "th": 40,
503
+ "in": 41,
504
+ "the": 42,
505
+ "an": 43,
506
+ "er": 44,
507
+ "ou": 45,
508
+ "re": 46,
509
+ "on": 47,
510
+ "at": 48,
511
+ "ed": 49,
512
+ "en": 50,
513
+ "to": 51,
514
+ "ing": 52,
515
+ "and": 53,
516
+ "is": 54,
517
+ "as": 55,
518
+ "al": 56,
519
+ "or": 57,
520
+ "of": 58,
521
+ "ar": 59,
522
+ "it": 60,
523
+ "es": 61,
524
+ "he": 62,
525
+ "st": 63,
526
+ "le": 64,
527
+ "om": 65,
528
+ "se": 66,
529
+ "be": 67,
530
+ "ad": 68,
531
+ "ow": 69,
532
+ "ly": 70,
533
+ "ch": 71,
534
+ "wh": 72,
535
+ "that": 73,
536
+ "you": 74,
537
+ "li": 75,
538
+ "ve": 76,
539
+ "ac": 77,
540
+ "ti": 78,
541
+ "ld": 79,
542
+ "me": 80,
543
+ "was": 81,
544
+ "gh": 82,
545
+ "id": 83,
546
+ "ll": 84,
547
+ "wi": 85,
548
+ "ent": 86,
549
+ "for": 87,
550
+ "ay": 88,
551
+ "ro": 89,
552
+ "ver": 90,
553
+ "ic": 91,
554
+ "her": 92,
555
+ "ke": 93,
556
+ "his": 94,
557
+ "no": 95,
558
+ "ut": 96,
559
+ "un": 97,
560
+ "ir": 98,
561
+ "lo": 99,
562
+ "we": 100,
563
+ "ri": 101,
564
+ "ha": 102,
565
+ "with": 103,
566
+ "ght": 104,
567
+ "out": 105,
568
+ "im": 106,
569
+ "ion": 107,
570
+ "all": 108,
571
+ "ab": 109,
572
+ "one": 110,
573
+ "ne": 111,
574
+ "ge": 112,
575
+ "ould": 113,
576
+ "ter": 114,
577
+ "mo": 115,
578
+ "had": 116,
579
+ "ce": 117,
580
+ "she": 118,
581
+ "go": 119,
582
+ "sh": 120,
583
+ "ur": 121,
584
+ "am": 122,
585
+ "so": 123,
586
+ "pe": 124,
587
+ "my": 125,
588
+ "de": 126,
589
+ "are": 127,
590
+ "but": 128,
591
+ "ome": 129,
592
+ "fr": 130,
593
+ "ther": 131,
594
+ "fe": 132,
595
+ "su": 133,
596
+ "do": 134,
597
+ "con": 135,
598
+ "te": 136,
599
+ "ain": 137,
600
+ "ere": 138,
601
+ "po": 139,
602
+ "if": 140,
603
+ "they": 141,
604
+ "us": 142,
605
+ "ag": 143,
606
+ "tr": 144,
607
+ "now": 145,
608
+ "oun": 146,
609
+ "this": 147,
610
+ "have": 148,
611
+ "not": 149,
612
+ "sa": 150,
613
+ "il": 151,
614
+ "up": 152,
615
+ "thing": 153,
616
+ "from": 154,
617
+ "ap": 155,
618
+ "him": 156,
619
+ "ack": 157,
620
+ "ation": 158,
621
+ "ant": 159,
622
+ "our": 160,
623
+ "op": 161,
624
+ "like": 162,
625
+ "ust": 163,
626
+ "ess": 164,
627
+ "bo": 165,
628
+ "ok": 166,
629
+ "ul": 167,
630
+ "ind": 168,
631
+ "ex": 169,
632
+ "com": 170,
633
+ "some": 171,
634
+ "there": 172,
635
+ "ers": 173,
636
+ "co": 174,
637
+ "res": 175,
638
+ "man": 176,
639
+ "ard": 177,
640
+ "pl": 178,
641
+ "wor": 179,
642
+ "way": 180,
643
+ "tion": 181,
644
+ "fo": 182,
645
+ "ca": 183,
646
+ "were": 184,
647
+ "by": 185,
648
+ "ate": 186,
649
+ "pro": 187,
650
+ "ted": 188,
651
+ "ound": 189,
652
+ "own": 190,
653
+ "would": 191,
654
+ "ts": 192,
655
+ "what": 193,
656
+ "qu": 194,
657
+ "ally": 195,
658
+ "ight": 196,
659
+ "ck": 197,
660
+ "gr": 198,
661
+ "when": 199,
662
+ "ven": 200,
663
+ "can": 201,
664
+ "ough": 202,
665
+ "ine": 203,
666
+ "end": 204,
667
+ "per": 205,
668
+ "ous": 206,
669
+ "od": 207,
670
+ "ide": 208,
671
+ "know": 209,
672
+ "ty": 210,
673
+ "very": 211,
674
+ "si": 212,
675
+ "ak": 213,
676
+ "who": 214,
677
+ "about": 215,
678
+ "ill": 216,
679
+ "them": 217,
680
+ "est": 218,
681
+ "red": 219,
682
+ "ye": 220,
683
+ "could": 221,
684
+ "ong": 222,
685
+ "your": 223,
686
+ "their": 224,
687
+ "em": 225,
688
+ "just": 226,
689
+ "other": 227,
690
+ "into": 228,
691
+ "any": 229,
692
+ "whi": 230,
693
+ "um": 231,
694
+ "tw": 232,
695
+ "ast": 233,
696
+ "der": 234,
697
+ "did": 235,
698
+ "ie": 236,
699
+ "been": 237,
700
+ "ace": 238,
701
+ "ink": 239,
702
+ "ity": 240,
703
+ "back": 241,
704
+ "ting": 242,
705
+ "br": 243,
706
+ "more": 244,
707
+ "ake": 245,
708
+ "pp": 246,
709
+ "then": 247,
710
+ "sp": 248,
711
+ "el": 249,
712
+ "use": 250,
713
+ "bl": 251,
714
+ "said": 252,
715
+ "over": 253,
716
+ "get": 254,
717
+ "[START]": 255,
718
+ "\"": 256,
719
+ "#": 257,
720
+ "$": 258,
721
+ "%": 259,
722
+ "&": 260,
723
+ "*": 261,
724
+ "+": 262,
725
+ "0": 263,
726
+ "1": 264,
727
+ "2": 265,
728
+ "3": 266,
729
+ "4": 267,
730
+ "5": 268,
731
+ "6": 269,
732
+ "7": 270,
733
+ "8": 271,
734
+ "9": 272,
735
+ "<": 273,
736
+ "=": 274,
737
+ ">": 275,
738
+ "@": 276,
739
+ "A": 277,
740
+ "B": 278,
741
+ "C": 279,
742
+ "D": 280,
743
+ "E": 281,
744
+ "F": 282,
745
+ "G": 283,
746
+ "H": 284,
747
+ "I": 285,
748
+ "J": 286,
749
+ "K": 287,
750
+ "L": 288,
751
+ "M": 289,
752
+ "N": 290,
753
+ "O": 291,
754
+ "P": 292,
755
+ "Q": 293,
756
+ "R": 294,
757
+ "S": 295,
758
+ "T": 296,
759
+ "U": 297,
760
+ "V": 298,
761
+ "W": 299,
762
+ "X": 300,
763
+ "Y": 301,
764
+ "Z": 302,
765
+ "[": 303,
766
+ "\\": 304,
767
+ "]": 305,
768
+ "^": 306,
769
+ "_": 307,
770
+ "`": 308,
771
+ "{": 309,
772
+ "|": 310,
773
+ "}": 311,
774
+ "~": 312,
775
+ "‐": 313,
776
+ "‑": 314,
777
+ "‒": 315,
778
+ "–": 316,
779
+ "—": 317,
780
+ "―": 318,
781
+ "‖": 319,
782
+ "‗": 320,
783
+ "‘": 321,
784
+ "’": 322,
785
+ "‚": 323,
786
+ "‛": 324,
787
+ "“": 325,
788
+ "”": 326,
789
+ "„": 327,
790
+ "‟": 328,
791
+ " ": 329,
792
+ "¡": 330,
793
+ "¢": 331,
794
+ "£": 332,
795
+ "¤": 333,
796
+ "¥": 334,
797
+ "¦": 335,
798
+ "§": 336,
799
+ "¨": 337,
800
+ "©": 338,
801
+ "ª": 339,
802
+ "«": 340,
803
+ "¬": 341,
804
+ "­": 342,
805
+ "®": 343,
806
+ "¯": 344,
807
+ "°": 345,
808
+ "±": 346,
809
+ "²": 347,
810
+ "³": 348,
811
+ "´": 349,
812
+ "µ": 350,
813
+ "¶": 351,
814
+ "·": 352,
815
+ "¸": 353,
816
+ "¹": 354,
817
+ "º": 355,
818
+ "»": 356,
819
+ "¼": 357,
820
+ "½": 358,
821
+ "¾": 359,
822
+ "¿": 360,
823
+ "À": 361,
824
+ "Á": 362,
825
+ "Â": 363,
826
+ "Ã": 364,
827
+ "Ä": 365,
828
+ "Å": 366,
829
+ "Æ": 367,
830
+ "Ç": 368,
831
+ "È": 369,
832
+ "É": 370,
833
+ "Ê": 371,
834
+ "Ë": 372,
835
+ "Ì": 373,
836
+ "Í": 374,
837
+ "Î": 375,
838
+ "Ï": 376,
839
+ "Ð": 377,
840
+ "Ñ": 378,
841
+ "Ò": 379,
842
+ "Ó": 380,
843
+ "Ô": 381,
844
+ "Õ": 382,
845
+ "Ö": 383,
846
+ "×": 384,
847
+ "Ø": 385,
848
+ "Ù": 386,
849
+ "Ú": 387,
850
+ "Û": 388,
851
+ "Ü": 389,
852
+ "Ý": 390,
853
+ "Þ": 391,
854
+ "ß": 392,
855
+ "à": 393,
856
+ "á": 394,
857
+ "â": 395,
858
+ "ã": 396,
859
+ "ä": 397,
860
+ "å": 398,
861
+ "æ": 399,
862
+ "ç": 400,
863
+ "è": 401,
864
+ "é": 402,
865
+ "ê": 403,
866
+ "ë": 404,
867
+ "ì": 405,
868
+ "í": 406,
869
+ "î": 407,
870
+ "ï": 408,
871
+ "ð": 409,
872
+ "ñ": 410,
873
+ "ò": 411,
874
+ "ó": 412,
875
+ "ô": 413,
876
+ "õ": 414,
877
+ "ö": 415,
878
+ "÷": 416,
879
+ "ø": 417,
880
+ "ù": 418,
881
+ "ú": 419,
882
+ "û": 420,
883
+ "ü": 421,
884
+ "ý": 422,
885
+ "þ": 423,
886
+ "ÿ": 424,
887
+ "ɐ": 425,
888
+ "ɑ": 426,
889
+ "ɒ": 427,
890
+ "ɓ": 428,
891
+ "ɔ": 429,
892
+ "ɕ": 430,
893
+ "ɖ": 431,
894
+ "ɗ": 432,
895
+ "ɘ": 433,
896
+ "ə": 434,
897
+ "ɚ": 435,
898
+ "ɛ": 436,
899
+ "ɜ": 437,
900
+ "ɝ": 438,
901
+ "ɞ": 439,
902
+ "ɟ": 440,
903
+ "ɠ": 441,
904
+ "ɡ": 442,
905
+ "ɢ": 443,
906
+ "ɣ": 444,
907
+ "ɤ": 445,
908
+ "ɥ": 446,
909
+ "ɦ": 447,
910
+ "ɧ": 448,
911
+ "ɨ": 449,
912
+ "ɩ": 450,
913
+ "ɪ": 451,
914
+ "ɫ": 452,
915
+ "ɬ": 453,
916
+ "ɭ": 454,
917
+ "ɮ": 455,
918
+ "ɯ": 456,
919
+ "ɰ": 457,
920
+ "ɱ": 458,
921
+ "ɲ": 459,
922
+ "ɳ": 460,
923
+ "ɴ": 461,
924
+ "ɵ": 462,
925
+ "ɶ": 463,
926
+ "ɷ": 464,
927
+ "ɸ": 465,
928
+ "ɹ": 466,
929
+ "ɺ": 467,
930
+ "ɻ": 468,
931
+ "ɼ": 469,
932
+ "ɽ": 470,
933
+ "ɾ": 471,
934
+ "ɿ": 472,
935
+ "ʀ": 473,
936
+ "ʁ": 474,
937
+ "ʂ": 475,
938
+ "ʃ": 476,
939
+ "ʄ": 477,
940
+ "ʅ": 478,
941
+ "ʆ": 479,
942
+ "ʇ": 480,
943
+ "ʈ": 481,
944
+ "ʉ": 482,
945
+ "ʊ": 483,
946
+ "ʋ": 484,
947
+ "ʌ": 485,
948
+ "ʍ": 486,
949
+ "ʎ": 487,
950
+ "ʏ": 488,
951
+ "ʐ": 489,
952
+ "ʑ": 490,
953
+ "ʒ": 491,
954
+ "ʓ": 492,
955
+ "ʔ": 493,
956
+ "ʕ": 494,
957
+ "ʖ": 495,
958
+ "ʗ": 496,
959
+ "ʘ": 497,
960
+ "ʙ": 498,
961
+ "ʚ": 499,
962
+ "ʛ": 500,
963
+ "ʜ": 501,
964
+ "ʝ": 502,
965
+ "ʞ": 503,
966
+ "ʟ": 504,
967
+ "ʠ": 505,
968
+ "ʡ": 506,
969
+ "ʢ": 507,
970
+ "ʣ": 508,
971
+ "ʤ": 509,
972
+ "ʥ": 510,
973
+ "ʦ": 511,
974
+ "ʧ": 512,
975
+ "ʨ": 513,
976
+ "ʩ": 514,
977
+ "ʪ": 515,
978
+ "ʫ": 516,
979
+ "ʬ": 517,
980
+ "ʭ": 518,
981
+ "ʮ": 519,
982
+ "ʯ": 520,
983
+ "ʰ": 521,
984
+ "ʱ": 522,
985
+ "ʲ": 523,
986
+ "ʳ": 524,
987
+ "ʴ": 525,
988
+ "ʵ": 526,
989
+ "ʶ": 527,
990
+ "ʷ": 528,
991
+ "ʸ": 529,
992
+ "ʹ": 530,
993
+ "ʺ": 531,
994
+ "ʻ": 532,
995
+ "ʼ": 533,
996
+ "ʽ": 534,
997
+ "ʾ": 535,
998
+ "ʿ": 536,
999
+ "ˀ": 537,
1000
+ "ˁ": 538,
1001
+ "˂": 539,
1002
+ "˃": 540,
1003
+ "˄": 541,
1004
+ "˅": 542,
1005
+ "ˆ": 543,
1006
+ "ˇ": 544,
1007
+ "ˈ": 545,
1008
+ "ˉ": 546,
1009
+ "ˊ": 547,
1010
+ "ˋ": 548,
1011
+ "ˌ": 549,
1012
+ "ˍ": 550,
1013
+ "ˎ": 551,
1014
+ "ˏ": 552,
1015
+ "ː": 553,
1016
+ "ˑ": 554,
1017
+ "˒": 555,
1018
+ "˓": 556,
1019
+ "˔": 557,
1020
+ "˕": 558,
1021
+ "˖": 559,
1022
+ "˗": 560,
1023
+ "˘": 561,
1024
+ "˙": 562,
1025
+ "˚": 563,
1026
+ "˛": 564,
1027
+ "˜": 565,
1028
+ "˝": 566,
1029
+ "˞": 567,
1030
+ "˟": 568,
1031
+ "ˠ": 569,
1032
+ "ˡ": 570,
1033
+ "ˢ": 571,
1034
+ "ˣ": 572,
1035
+ "ˤ": 573,
1036
+ "˥": 574,
1037
+ "˦": 575,
1038
+ "˧": 576,
1039
+ "˨": 577,
1040
+ "˩": 578,
1041
+ "˪": 579,
1042
+ "˫": 580,
1043
+ "ˬ": 581,
1044
+ "˭": 582,
1045
+ "ˮ": 583,
1046
+ "˯": 584,
1047
+ "˰": 585,
1048
+ "˱": 586,
1049
+ "˲": 587,
1050
+ "˳": 588,
1051
+ "˴": 589,
1052
+ "˵": 590,
1053
+ "˶": 591,
1054
+ "˷": 592,
1055
+ "˸": 593,
1056
+ "˹": 594,
1057
+ "˺": 595,
1058
+ "˻": 596,
1059
+ "˼": 597,
1060
+ "˽": 598,
1061
+ "˾": 599,
1062
+ "˿": 600,
1063
+ "ā": 601,
1064
+ "ō": 602,
1065
+ "…": 603,
1066
+ "[UH]": 604,
1067
+ "[UM]": 605,
1068
+ "[giggle]": 606,
1069
+ "[laughter]": 607,
1070
+ "[guffaw]": 608,
1071
+ "[inhale]": 609,
1072
+ "[exhale]": 610,
1073
+ "[sigh]": 611,
1074
+ "[cry]": 612,
1075
+ "[bark]": 613,
1076
+ "[howl]": 614,
1077
+ "[meow]": 615,
1078
+ "[singing]": 616,
1079
+ "[music]": 617,
1080
+ "[whistle]": 618,
1081
+ "[humming]": 619,
1082
+ "[gasp]": 620,
1083
+ "[groan]": 621,
1084
+ "[whisper]": 622,
1085
+ "[mumble]": 623,
1086
+ "[sniff]": 624,
1087
+ "[sneeze]": 625,
1088
+ "[cough]": 626,
1089
+ "[snore]": 627,
1090
+ "[chew]": 628,
1091
+ "[sip]": 629,
1092
+ "[clear_throat]": 630,
1093
+ "[kiss]": 631,
1094
+ "[shhh]": 632,
1095
+ "[gibberish]": 633,
1096
+ "[fr]": 634,
1097
+ "[es]": 635,
1098
+ "[de]": 636,
1099
+ "[it]": 637,
1100
+ "[ipa]": 638,
1101
+ "[end_of_label]": 639,
1102
+ "ŋ": 640,
1103
+ "ᵻ": 641,
1104
+ "θ": 642,
1105
+ "̩": 643,
1106
+ "\u0303": 644,
1107
+ "ɑː": 645,
1108
+ "iː": 646,
1109
+ "uː": 647,
1110
+ "ɜː": 648,
1111
+ "ɔː": 649,
1112
+ "oː": 650,
1113
+ "eɪ": 651,
1114
+ "oʊ": 652,
1115
+ "aɪ": 653,
1116
+ "aʊ": 654,
1117
+ "ɔɪ": 655,
1118
+ "dʒ": 656,
1119
+ "tʃ": 657,
1120
+ "ɪŋ": 658,
1121
+ "ᵻd": 659,
1122
+ "ˈiː": 660,
1123
+ "ˌiː": 661,
1124
+ "ˈɪ": 662,
1125
+ "ˌɪ": 663,
1126
+ "ˈeɪ": 664,
1127
+ "ˌeɪ": 665,
1128
+ "ˈɛ": 666,
1129
+ "ˌɛ": 667,
1130
+ "ˈæ": 668,
1131
+ "ˌæ": 669,
1132
+ "ˈɑː": 670,
1133
+ "ˌɑː": 671,
1134
+ "ˈɔː": 672,
1135
+ "ˌɔː": 673,
1136
+ "oːɹ": 674,
1137
+ "ˈoːɹ": 675,
1138
+ "ˌoːɹ": 676,
1139
+ "ˈoʊ": 677,
1140
+ "ˌoʊ": 678,
1141
+ "ˈʊ": 679,
1142
+ "ˌʊ": 680,
1143
+ "ˈuː": 681,
1144
+ "ˌuː": 682,
1145
+ "ˈɜː": 683,
1146
+ "ˌɜː": 684,
1147
+ "ˈʌ": 685,
1148
+ "ˌʌ": 686,
1149
+ "ˈaɪ": 687,
1150
+ "ˌaɪ": 688,
1151
+ "ˈaʊ": 689,
1152
+ "ˌaʊ": 690,
1153
+ "ˈɔɪ": 691,
1154
+ "ˌɔɪ": 692,
1155
+ "ˈɚ": 693,
1156
+ "ˌɐ": 694,
1157
+ "[PLACEHOLDER55]": 695,
1158
+ "[PLACEHOLDER56]": 696,
1159
+ "[PLACEHOLDER57]": 697,
1160
+ "[PLACEHOLDER58]": 698,
1161
+ "[PLACEHOLDER59]": 699,
1162
+ "[PLACEHOLDER60]": 700,
1163
+ "[PLACEHOLDER61]": 701,
1164
+ "[PLACEHOLDER62]": 702,
1165
+ "[PLACEHOLDER63]": 703
1166
+ },
1167
+ "merges": [
1168
+ "t h",
1169
+ "i n",
1170
+ "th e",
1171
+ "a n",
1172
+ "e r",
1173
+ "o u",
1174
+ "r e",
1175
+ "o n",
1176
+ "a t",
1177
+ "e d",
1178
+ "e n",
1179
+ "t o",
1180
+ "in g",
1181
+ "an d",
1182
+ "i s",
1183
+ "a s",
1184
+ "a l",
1185
+ "o r",
1186
+ "o f",
1187
+ "a r",
1188
+ "i t",
1189
+ "e s",
1190
+ "h e",
1191
+ "s t",
1192
+ "l e",
1193
+ "o m",
1194
+ "s e",
1195
+ "b e",
1196
+ "a d",
1197
+ "o w",
1198
+ "l y",
1199
+ "c h",
1200
+ "w h",
1201
+ "th at",
1202
+ "y ou",
1203
+ "l i",
1204
+ "v e",
1205
+ "a c",
1206
+ "t i",
1207
+ "l d",
1208
+ "m e",
1209
+ "w as",
1210
+ "g h",
1211
+ "i d",
1212
+ "l l",
1213
+ "w i",
1214
+ "en t",
1215
+ "f or",
1216
+ "a y",
1217
+ "r o",
1218
+ "v er",
1219
+ "i c",
1220
+ "h er",
1221
+ "k e",
1222
+ "h is",
1223
+ "n o",
1224
+ "u t",
1225
+ "u n",
1226
+ "i r",
1227
+ "l o",
1228
+ "w e",
1229
+ "r i",
1230
+ "h a",
1231
+ "wi th",
1232
+ "gh t",
1233
+ "ou t",
1234
+ "i m",
1235
+ "i on",
1236
+ "al l",
1237
+ "a b",
1238
+ "on e",
1239
+ "n e",
1240
+ "g e",
1241
+ "ou ld",
1242
+ "t er",
1243
+ "m o",
1244
+ "h ad",
1245
+ "c e",
1246
+ "s he",
1247
+ "g o",
1248
+ "s h",
1249
+ "u r",
1250
+ "a m",
1251
+ "s o",
1252
+ "p e",
1253
+ "m y",
1254
+ "d e",
1255
+ "a re",
1256
+ "b ut",
1257
+ "om e",
1258
+ "f r",
1259
+ "the r",
1260
+ "f e",
1261
+ "s u",
1262
+ "d o",
1263
+ "c on",
1264
+ "t e",
1265
+ "a in",
1266
+ "er e",
1267
+ "p o",
1268
+ "i f",
1269
+ "the y",
1270
+ "u s",
1271
+ "a g",
1272
+ "t r",
1273
+ "n ow",
1274
+ "ou n",
1275
+ "th is",
1276
+ "ha ve",
1277
+ "no t",
1278
+ "s a",
1279
+ "i l",
1280
+ "u p",
1281
+ "th ing",
1282
+ "fr om",
1283
+ "a p",
1284
+ "h im",
1285
+ "ac k",
1286
+ "at ion",
1287
+ "an t",
1288
+ "ou r",
1289
+ "o p",
1290
+ "li ke",
1291
+ "u st",
1292
+ "es s",
1293
+ "b o",
1294
+ "o k",
1295
+ "u l",
1296
+ "in d",
1297
+ "e x",
1298
+ "c om",
1299
+ "s ome",
1300
+ "the re",
1301
+ "er s",
1302
+ "c o",
1303
+ "re s",
1304
+ "m an",
1305
+ "ar d",
1306
+ "p l",
1307
+ "w or",
1308
+ "w ay",
1309
+ "ti on",
1310
+ "f o",
1311
+ "c a",
1312
+ "w ere",
1313
+ "b y",
1314
+ "at e",
1315
+ "p ro",
1316
+ "t ed",
1317
+ "oun d",
1318
+ "ow n",
1319
+ "w ould",
1320
+ "t s",
1321
+ "wh at",
1322
+ "q u",
1323
+ "al ly",
1324
+ "i ght",
1325
+ "c k",
1326
+ "g r",
1327
+ "wh en",
1328
+ "v en",
1329
+ "c an",
1330
+ "ou gh",
1331
+ "in e",
1332
+ "en d",
1333
+ "p er",
1334
+ "ou s",
1335
+ "o d",
1336
+ "id e",
1337
+ "k now",
1338
+ "t y",
1339
+ "ver y",
1340
+ "s i",
1341
+ "a k",
1342
+ "wh o",
1343
+ "ab out",
1344
+ "i ll",
1345
+ "the m",
1346
+ "es t",
1347
+ "re d",
1348
+ "y e",
1349
+ "c ould",
1350
+ "on g",
1351
+ "you r",
1352
+ "the ir",
1353
+ "e m",
1354
+ "j ust",
1355
+ "o ther",
1356
+ "in to",
1357
+ "an y",
1358
+ "wh i",
1359
+ "u m",
1360
+ "t w",
1361
+ "as t",
1362
+ "d er",
1363
+ "d id",
1364
+ "i e",
1365
+ "be en",
1366
+ "ac e",
1367
+ "in k",
1368
+ "it y",
1369
+ "b ack",
1370
+ "t ing",
1371
+ "b r",
1372
+ "mo re",
1373
+ "a ke",
1374
+ "p p",
1375
+ "the n",
1376
+ "s p",
1377
+ "e l",
1378
+ "u se",
1379
+ "b l",
1380
+ "sa id",
1381
+ "o ver",
1382
+ "ge t",
1383
+ "ɑ ː",
1384
+ "i ː",
1385
+ "u ː",
1386
+ "ɜ ː",
1387
+ "ɔ ː",
1388
+ "o ː",
1389
+ "e ɪ",
1390
+ "o ʊ",
1391
+ "a ɪ",
1392
+ "a ʊ",
1393
+ "ɔ ɪ",
1394
+ "d ʒ",
1395
+ "t ʃ",
1396
+ "ɪ ŋ",
1397
+ "ᵻ d",
1398
+ "ˈ iː",
1399
+ "ˌ iː",
1400
+ "ˈ ɪ",
1401
+ "ˌ ɪ",
1402
+ "ˈ eɪ",
1403
+ "ˌ eɪ",
1404
+ "ˈ ɛ",
1405
+ "ˌ ɛ",
1406
+ "ˈ æ",
1407
+ "ˌ æ",
1408
+ "ˈ ɑː",
1409
+ "ˌ ɑː",
1410
+ "ˈ ɔː",
1411
+ "ˌ ɔː",
1412
+ "oː ɹ",
1413
+ "ˈ oːɹ",
1414
+ "ˌ oːɹ",
1415
+ "ˈ oʊ",
1416
+ "ˌ oʊ",
1417
+ "ˈ ʊ",
1418
+ "ˌ ʊ",
1419
+ "ˈ uː",
1420
+ "ˌ uː",
1421
+ "ˈ ɜː",
1422
+ "ˌ ɜː",
1423
+ "ˈ ʌ",
1424
+ "ˌ ʌ",
1425
+ "ˈ aɪ",
1426
+ "ˌ aɪ",
1427
+ "ˈ aʊ",
1428
+ "ˌ aʊ",
1429
+ "ˈ ɔɪ",
1430
+ "ˌ ɔɪ",
1431
+ "ˈ ɚ",
1432
+ "ˌ ɐ"
1433
+ ]
1434
+ }
1435
+ }
trainer_state.json ADDED
@@ -0,0 +1,1923 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 41.328125,
5
+ "eval_steps": 500,
6
+ "global_step": 2645,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0,
13
+ "eval_runtime": 2.9591,
14
+ "eval_samples_per_second": 0.338,
15
+ "eval_steps_per_second": 0.338,
16
+ "step": 0
17
+ },
18
+ {
19
+ "epoch": 0.15625,
20
+ "grad_norm": 8.098007202148438,
21
+ "learning_rate": 4.000000000000001e-06,
22
+ "loss": 8.5142,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.3125,
27
+ "grad_norm": 6.856651306152344,
28
+ "learning_rate": 9e-06,
29
+ "loss": 7.9708,
30
+ "step": 20
31
+ },
32
+ {
33
+ "epoch": 0.46875,
34
+ "grad_norm": 5.80125093460083,
35
+ "learning_rate": 1.4000000000000001e-05,
36
+ "loss": 8.01,
37
+ "step": 30
38
+ },
39
+ {
40
+ "epoch": 0.625,
41
+ "grad_norm": 5.687861442565918,
42
+ "learning_rate": 1.9e-05,
43
+ "loss": 7.6212,
44
+ "step": 40
45
+ },
46
+ {
47
+ "epoch": 0.78125,
48
+ "grad_norm": 6.117136478424072,
49
+ "learning_rate": 2.4e-05,
50
+ "loss": 7.3311,
51
+ "step": 50
52
+ },
53
+ {
54
+ "epoch": 0.9375,
55
+ "grad_norm": 5.861077785491943,
56
+ "learning_rate": 2.9e-05,
57
+ "loss": 7.5553,
58
+ "step": 60
59
+ },
60
+ {
61
+ "epoch": 1.09375,
62
+ "grad_norm": 6.021880149841309,
63
+ "learning_rate": 3.4000000000000007e-05,
64
+ "loss": 7.05,
65
+ "step": 70
66
+ },
67
+ {
68
+ "epoch": 1.25,
69
+ "grad_norm": 7.620683670043945,
70
+ "learning_rate": 3.9000000000000006e-05,
71
+ "loss": 6.7186,
72
+ "step": 80
73
+ },
74
+ {
75
+ "epoch": 1.40625,
76
+ "grad_norm": 7.098018169403076,
77
+ "learning_rate": 4.4000000000000006e-05,
78
+ "loss": 6.5668,
79
+ "step": 90
80
+ },
81
+ {
82
+ "epoch": 1.5625,
83
+ "grad_norm": 7.157684326171875,
84
+ "learning_rate": 4.9e-05,
85
+ "loss": 6.6148,
86
+ "step": 100
87
+ },
88
+ {
89
+ "epoch": 1.71875,
90
+ "grad_norm": 7.009696006774902,
91
+ "learning_rate": 4.9936507936507936e-05,
92
+ "loss": 6.5017,
93
+ "step": 110
94
+ },
95
+ {
96
+ "epoch": 1.875,
97
+ "grad_norm": 7.6622467041015625,
98
+ "learning_rate": 4.985714285714286e-05,
99
+ "loss": 6.53,
100
+ "step": 120
101
+ },
102
+ {
103
+ "epoch": 2.03125,
104
+ "grad_norm": 8.221333503723145,
105
+ "learning_rate": 4.977777777777778e-05,
106
+ "loss": 6.306,
107
+ "step": 130
108
+ },
109
+ {
110
+ "epoch": 2.1875,
111
+ "grad_norm": 8.832132339477539,
112
+ "learning_rate": 4.96984126984127e-05,
113
+ "loss": 4.8787,
114
+ "step": 140
115
+ },
116
+ {
117
+ "epoch": 2.34375,
118
+ "grad_norm": 9.245682716369629,
119
+ "learning_rate": 4.961904761904762e-05,
120
+ "loss": 4.7796,
121
+ "step": 150
122
+ },
123
+ {
124
+ "epoch": 2.5,
125
+ "grad_norm": 9.249746322631836,
126
+ "learning_rate": 4.953968253968254e-05,
127
+ "loss": 4.79,
128
+ "step": 160
129
+ },
130
+ {
131
+ "epoch": 2.65625,
132
+ "grad_norm": 8.098647117614746,
133
+ "learning_rate": 4.9460317460317465e-05,
134
+ "loss": 4.8157,
135
+ "step": 170
136
+ },
137
+ {
138
+ "epoch": 2.8125,
139
+ "grad_norm": 9.594801902770996,
140
+ "learning_rate": 4.9380952380952386e-05,
141
+ "loss": 4.5135,
142
+ "step": 180
143
+ },
144
+ {
145
+ "epoch": 2.96875,
146
+ "grad_norm": 9.500739097595215,
147
+ "learning_rate": 4.930158730158731e-05,
148
+ "loss": 4.9346,
149
+ "step": 190
150
+ },
151
+ {
152
+ "epoch": 3.125,
153
+ "grad_norm": 10.759628295898438,
154
+ "learning_rate": 4.922222222222222e-05,
155
+ "loss": 3.4195,
156
+ "step": 200
157
+ },
158
+ {
159
+ "epoch": 3.28125,
160
+ "grad_norm": 12.300085067749023,
161
+ "learning_rate": 4.9142857142857144e-05,
162
+ "loss": 3.136,
163
+ "step": 210
164
+ },
165
+ {
166
+ "epoch": 3.4375,
167
+ "grad_norm": 9.825932502746582,
168
+ "learning_rate": 4.9063492063492065e-05,
169
+ "loss": 3.0981,
170
+ "step": 220
171
+ },
172
+ {
173
+ "epoch": 3.59375,
174
+ "grad_norm": 10.764175415039062,
175
+ "learning_rate": 4.898412698412699e-05,
176
+ "loss": 3.0372,
177
+ "step": 230
178
+ },
179
+ {
180
+ "epoch": 3.75,
181
+ "grad_norm": 10.330010414123535,
182
+ "learning_rate": 4.890476190476191e-05,
183
+ "loss": 3.0934,
184
+ "step": 240
185
+ },
186
+ {
187
+ "epoch": 3.90625,
188
+ "grad_norm": 10.405282020568848,
189
+ "learning_rate": 4.882539682539683e-05,
190
+ "loss": 3.135,
191
+ "step": 250
192
+ },
193
+ {
194
+ "epoch": 4.0625,
195
+ "grad_norm": 9.842012405395508,
196
+ "learning_rate": 4.874603174603175e-05,
197
+ "loss": 2.7457,
198
+ "step": 260
199
+ },
200
+ {
201
+ "epoch": 4.21875,
202
+ "grad_norm": 10.659170150756836,
203
+ "learning_rate": 4.866666666666667e-05,
204
+ "loss": 1.981,
205
+ "step": 270
206
+ },
207
+ {
208
+ "epoch": 4.375,
209
+ "grad_norm": 9.820462226867676,
210
+ "learning_rate": 4.858730158730159e-05,
211
+ "loss": 2.0881,
212
+ "step": 280
213
+ },
214
+ {
215
+ "epoch": 4.53125,
216
+ "grad_norm": 9.645284652709961,
217
+ "learning_rate": 4.850793650793651e-05,
218
+ "loss": 1.9438,
219
+ "step": 290
220
+ },
221
+ {
222
+ "epoch": 4.6875,
223
+ "grad_norm": 9.881282806396484,
224
+ "learning_rate": 4.842857142857143e-05,
225
+ "loss": 1.8556,
226
+ "step": 300
227
+ },
228
+ {
229
+ "epoch": 4.84375,
230
+ "grad_norm": 10.5478515625,
231
+ "learning_rate": 4.834920634920635e-05,
232
+ "loss": 2.0494,
233
+ "step": 310
234
+ },
235
+ {
236
+ "epoch": 5.0,
237
+ "grad_norm": 19.77553939819336,
238
+ "learning_rate": 4.8269841269841274e-05,
239
+ "loss": 2.0695,
240
+ "step": 320
241
+ },
242
+ {
243
+ "epoch": 5.15625,
244
+ "grad_norm": 8.124504089355469,
245
+ "learning_rate": 4.819047619047619e-05,
246
+ "loss": 1.1424,
247
+ "step": 330
248
+ },
249
+ {
250
+ "epoch": 5.3125,
251
+ "grad_norm": 9.63291072845459,
252
+ "learning_rate": 4.811111111111111e-05,
253
+ "loss": 1.1688,
254
+ "step": 340
255
+ },
256
+ {
257
+ "epoch": 5.46875,
258
+ "grad_norm": 8.620952606201172,
259
+ "learning_rate": 4.803174603174603e-05,
260
+ "loss": 1.2346,
261
+ "step": 350
262
+ },
263
+ {
264
+ "epoch": 5.625,
265
+ "grad_norm": 8.060315132141113,
266
+ "learning_rate": 4.795238095238095e-05,
267
+ "loss": 1.176,
268
+ "step": 360
269
+ },
270
+ {
271
+ "epoch": 5.78125,
272
+ "grad_norm": 7.583358287811279,
273
+ "learning_rate": 4.7873015873015874e-05,
274
+ "loss": 1.2118,
275
+ "step": 370
276
+ },
277
+ {
278
+ "epoch": 5.9375,
279
+ "grad_norm": 10.535112380981445,
280
+ "learning_rate": 4.7793650793650796e-05,
281
+ "loss": 1.2446,
282
+ "step": 380
283
+ },
284
+ {
285
+ "epoch": 6.09375,
286
+ "grad_norm": 7.170854568481445,
287
+ "learning_rate": 4.771428571428572e-05,
288
+ "loss": 1.0445,
289
+ "step": 390
290
+ },
291
+ {
292
+ "epoch": 6.25,
293
+ "grad_norm": 6.0907392501831055,
294
+ "learning_rate": 4.763492063492064e-05,
295
+ "loss": 0.7688,
296
+ "step": 400
297
+ },
298
+ {
299
+ "epoch": 6.40625,
300
+ "grad_norm": 7.953549385070801,
301
+ "learning_rate": 4.755555555555556e-05,
302
+ "loss": 0.7333,
303
+ "step": 410
304
+ },
305
+ {
306
+ "epoch": 6.5625,
307
+ "grad_norm": 10.276406288146973,
308
+ "learning_rate": 4.747619047619048e-05,
309
+ "loss": 0.8645,
310
+ "step": 420
311
+ },
312
+ {
313
+ "epoch": 6.71875,
314
+ "grad_norm": 7.596552848815918,
315
+ "learning_rate": 4.73968253968254e-05,
316
+ "loss": 0.7799,
317
+ "step": 430
318
+ },
319
+ {
320
+ "epoch": 6.875,
321
+ "grad_norm": 6.478920936584473,
322
+ "learning_rate": 4.7317460317460325e-05,
323
+ "loss": 0.8262,
324
+ "step": 440
325
+ },
326
+ {
327
+ "epoch": 7.03125,
328
+ "grad_norm": 4.837390422821045,
329
+ "learning_rate": 4.723809523809524e-05,
330
+ "loss": 0.7218,
331
+ "step": 450
332
+ },
333
+ {
334
+ "epoch": 7.1875,
335
+ "grad_norm": 5.8777289390563965,
336
+ "learning_rate": 4.715873015873016e-05,
337
+ "loss": 0.5238,
338
+ "step": 460
339
+ },
340
+ {
341
+ "epoch": 7.34375,
342
+ "grad_norm": 4.988452434539795,
343
+ "learning_rate": 4.707936507936508e-05,
344
+ "loss": 0.5285,
345
+ "step": 470
346
+ },
347
+ {
348
+ "epoch": 7.5,
349
+ "grad_norm": 5.605051517486572,
350
+ "learning_rate": 4.7e-05,
351
+ "loss": 0.5368,
352
+ "step": 480
353
+ },
354
+ {
355
+ "epoch": 7.65625,
356
+ "grad_norm": 7.234042644500732,
357
+ "learning_rate": 4.692063492063492e-05,
358
+ "loss": 0.5803,
359
+ "step": 490
360
+ },
361
+ {
362
+ "epoch": 7.8125,
363
+ "grad_norm": 6.273004055023193,
364
+ "learning_rate": 4.684126984126984e-05,
365
+ "loss": 0.519,
366
+ "step": 500
367
+ },
368
+ {
369
+ "epoch": 7.8125,
370
+ "eval_runtime": 2.3062,
371
+ "eval_samples_per_second": 0.434,
372
+ "eval_steps_per_second": 0.434,
373
+ "step": 500
374
+ },
375
+ {
376
+ "epoch": 7.96875,
377
+ "grad_norm": 4.803182125091553,
378
+ "learning_rate": 4.676190476190476e-05,
379
+ "loss": 0.5611,
380
+ "step": 510
381
+ },
382
+ {
383
+ "epoch": 8.125,
384
+ "grad_norm": 4.977607727050781,
385
+ "learning_rate": 4.668253968253968e-05,
386
+ "loss": 0.3616,
387
+ "step": 520
388
+ },
389
+ {
390
+ "epoch": 8.28125,
391
+ "grad_norm": 4.4240336418151855,
392
+ "learning_rate": 4.6603174603174605e-05,
393
+ "loss": 0.4303,
394
+ "step": 530
395
+ },
396
+ {
397
+ "epoch": 8.4375,
398
+ "grad_norm": 5.406126022338867,
399
+ "learning_rate": 4.6523809523809526e-05,
400
+ "loss": 0.384,
401
+ "step": 540
402
+ },
403
+ {
404
+ "epoch": 8.59375,
405
+ "grad_norm": 4.34033203125,
406
+ "learning_rate": 4.644444444444445e-05,
407
+ "loss": 0.3784,
408
+ "step": 550
409
+ },
410
+ {
411
+ "epoch": 8.75,
412
+ "grad_norm": 5.256556034088135,
413
+ "learning_rate": 4.636507936507937e-05,
414
+ "loss": 0.4345,
415
+ "step": 560
416
+ },
417
+ {
418
+ "epoch": 8.90625,
419
+ "grad_norm": 5.054710388183594,
420
+ "learning_rate": 4.628571428571429e-05,
421
+ "loss": 0.4257,
422
+ "step": 570
423
+ },
424
+ {
425
+ "epoch": 9.0625,
426
+ "grad_norm": 5.220774173736572,
427
+ "learning_rate": 4.6206349206349205e-05,
428
+ "loss": 0.3146,
429
+ "step": 580
430
+ },
431
+ {
432
+ "epoch": 9.21875,
433
+ "grad_norm": 4.420045375823975,
434
+ "learning_rate": 4.612698412698413e-05,
435
+ "loss": 0.298,
436
+ "step": 590
437
+ },
438
+ {
439
+ "epoch": 9.375,
440
+ "grad_norm": 4.474491596221924,
441
+ "learning_rate": 4.604761904761905e-05,
442
+ "loss": 0.3023,
443
+ "step": 600
444
+ },
445
+ {
446
+ "epoch": 9.53125,
447
+ "grad_norm": 4.131107807159424,
448
+ "learning_rate": 4.596825396825397e-05,
449
+ "loss": 0.2846,
450
+ "step": 610
451
+ },
452
+ {
453
+ "epoch": 9.6875,
454
+ "grad_norm": 4.535623550415039,
455
+ "learning_rate": 4.588888888888889e-05,
456
+ "loss": 0.3244,
457
+ "step": 620
458
+ },
459
+ {
460
+ "epoch": 9.84375,
461
+ "grad_norm": 4.365594387054443,
462
+ "learning_rate": 4.580952380952381e-05,
463
+ "loss": 0.2925,
464
+ "step": 630
465
+ },
466
+ {
467
+ "epoch": 10.0,
468
+ "grad_norm": 7.354922294616699,
469
+ "learning_rate": 4.5730158730158734e-05,
470
+ "loss": 0.3234,
471
+ "step": 640
472
+ },
473
+ {
474
+ "epoch": 10.15625,
475
+ "grad_norm": 3.916835308074951,
476
+ "learning_rate": 4.5650793650793656e-05,
477
+ "loss": 0.236,
478
+ "step": 650
479
+ },
480
+ {
481
+ "epoch": 10.3125,
482
+ "grad_norm": 4.322419166564941,
483
+ "learning_rate": 4.557142857142858e-05,
484
+ "loss": 0.2353,
485
+ "step": 660
486
+ },
487
+ {
488
+ "epoch": 10.46875,
489
+ "grad_norm": 5.4846038818359375,
490
+ "learning_rate": 4.54920634920635e-05,
491
+ "loss": 0.2422,
492
+ "step": 670
493
+ },
494
+ {
495
+ "epoch": 10.625,
496
+ "grad_norm": 4.341657638549805,
497
+ "learning_rate": 4.5412698412698414e-05,
498
+ "loss": 0.2592,
499
+ "step": 680
500
+ },
501
+ {
502
+ "epoch": 10.78125,
503
+ "grad_norm": 4.786070346832275,
504
+ "learning_rate": 4.5333333333333335e-05,
505
+ "loss": 0.2501,
506
+ "step": 690
507
+ },
508
+ {
509
+ "epoch": 10.9375,
510
+ "grad_norm": 3.9264745712280273,
511
+ "learning_rate": 4.525396825396826e-05,
512
+ "loss": 0.2683,
513
+ "step": 700
514
+ },
515
+ {
516
+ "epoch": 11.09375,
517
+ "grad_norm": 3.851856231689453,
518
+ "learning_rate": 4.517460317460318e-05,
519
+ "loss": 0.1978,
520
+ "step": 710
521
+ },
522
+ {
523
+ "epoch": 11.25,
524
+ "grad_norm": 4.643581390380859,
525
+ "learning_rate": 4.509523809523809e-05,
526
+ "loss": 0.1841,
527
+ "step": 720
528
+ },
529
+ {
530
+ "epoch": 11.40625,
531
+ "grad_norm": 4.105888843536377,
532
+ "learning_rate": 4.5015873015873014e-05,
533
+ "loss": 0.1976,
534
+ "step": 730
535
+ },
536
+ {
537
+ "epoch": 11.5625,
538
+ "grad_norm": 3.809528350830078,
539
+ "learning_rate": 4.4936507936507936e-05,
540
+ "loss": 0.1896,
541
+ "step": 740
542
+ },
543
+ {
544
+ "epoch": 11.71875,
545
+ "grad_norm": 4.457248687744141,
546
+ "learning_rate": 4.485714285714286e-05,
547
+ "loss": 0.2125,
548
+ "step": 750
549
+ },
550
+ {
551
+ "epoch": 11.875,
552
+ "grad_norm": 3.7699806690216064,
553
+ "learning_rate": 4.477777777777778e-05,
554
+ "loss": 0.2212,
555
+ "step": 760
556
+ },
557
+ {
558
+ "epoch": 12.03125,
559
+ "grad_norm": 4.643237590789795,
560
+ "learning_rate": 4.46984126984127e-05,
561
+ "loss": 0.2235,
562
+ "step": 770
563
+ },
564
+ {
565
+ "epoch": 12.1875,
566
+ "grad_norm": 4.064443588256836,
567
+ "learning_rate": 4.461904761904762e-05,
568
+ "loss": 0.1503,
569
+ "step": 780
570
+ },
571
+ {
572
+ "epoch": 12.34375,
573
+ "grad_norm": 4.605109691619873,
574
+ "learning_rate": 4.4539682539682543e-05,
575
+ "loss": 0.172,
576
+ "step": 790
577
+ },
578
+ {
579
+ "epoch": 12.5,
580
+ "grad_norm": 3.8005576133728027,
581
+ "learning_rate": 4.4460317460317465e-05,
582
+ "loss": 0.1713,
583
+ "step": 800
584
+ },
585
+ {
586
+ "epoch": 12.65625,
587
+ "grad_norm": 4.829875469207764,
588
+ "learning_rate": 4.4380952380952386e-05,
589
+ "loss": 0.1783,
590
+ "step": 810
591
+ },
592
+ {
593
+ "epoch": 12.8125,
594
+ "grad_norm": 4.428802490234375,
595
+ "learning_rate": 4.430158730158731e-05,
596
+ "loss": 0.1742,
597
+ "step": 820
598
+ },
599
+ {
600
+ "epoch": 12.96875,
601
+ "grad_norm": 5.068906307220459,
602
+ "learning_rate": 4.422222222222222e-05,
603
+ "loss": 0.1797,
604
+ "step": 830
605
+ },
606
+ {
607
+ "epoch": 13.125,
608
+ "grad_norm": 3.3268795013427734,
609
+ "learning_rate": 4.4142857142857144e-05,
610
+ "loss": 0.1389,
611
+ "step": 840
612
+ },
613
+ {
614
+ "epoch": 13.28125,
615
+ "grad_norm": 3.312582015991211,
616
+ "learning_rate": 4.4063492063492066e-05,
617
+ "loss": 0.1191,
618
+ "step": 850
619
+ },
620
+ {
621
+ "epoch": 13.4375,
622
+ "grad_norm": 3.7299697399139404,
623
+ "learning_rate": 4.398412698412699e-05,
624
+ "loss": 0.1432,
625
+ "step": 860
626
+ },
627
+ {
628
+ "epoch": 13.59375,
629
+ "grad_norm": 4.627827167510986,
630
+ "learning_rate": 4.39047619047619e-05,
631
+ "loss": 0.1672,
632
+ "step": 870
633
+ },
634
+ {
635
+ "epoch": 13.75,
636
+ "grad_norm": 4.645421981811523,
637
+ "learning_rate": 4.3825396825396823e-05,
638
+ "loss": 0.1503,
639
+ "step": 880
640
+ },
641
+ {
642
+ "epoch": 13.90625,
643
+ "grad_norm": 5.321810245513916,
644
+ "learning_rate": 4.3746031746031745e-05,
645
+ "loss": 0.1486,
646
+ "step": 890
647
+ },
648
+ {
649
+ "epoch": 14.0625,
650
+ "grad_norm": 2.487105369567871,
651
+ "learning_rate": 4.3666666666666666e-05,
652
+ "loss": 0.1193,
653
+ "step": 900
654
+ },
655
+ {
656
+ "epoch": 14.21875,
657
+ "grad_norm": 4.098151683807373,
658
+ "learning_rate": 4.358730158730159e-05,
659
+ "loss": 0.1182,
660
+ "step": 910
661
+ },
662
+ {
663
+ "epoch": 14.375,
664
+ "grad_norm": 3.2538163661956787,
665
+ "learning_rate": 4.350793650793651e-05,
666
+ "loss": 0.1145,
667
+ "step": 920
668
+ },
669
+ {
670
+ "epoch": 14.53125,
671
+ "grad_norm": 3.7881181240081787,
672
+ "learning_rate": 4.342857142857143e-05,
673
+ "loss": 0.1255,
674
+ "step": 930
675
+ },
676
+ {
677
+ "epoch": 14.6875,
678
+ "grad_norm": 2.861689567565918,
679
+ "learning_rate": 4.334920634920635e-05,
680
+ "loss": 0.144,
681
+ "step": 940
682
+ },
683
+ {
684
+ "epoch": 14.84375,
685
+ "grad_norm": 3.1621310710906982,
686
+ "learning_rate": 4.3269841269841274e-05,
687
+ "loss": 0.1293,
688
+ "step": 950
689
+ },
690
+ {
691
+ "epoch": 15.0,
692
+ "grad_norm": 11.439355850219727,
693
+ "learning_rate": 4.3190476190476195e-05,
694
+ "loss": 0.1553,
695
+ "step": 960
696
+ },
697
+ {
698
+ "epoch": 15.15625,
699
+ "grad_norm": 3.6791841983795166,
700
+ "learning_rate": 4.311111111111111e-05,
701
+ "loss": 0.0949,
702
+ "step": 970
703
+ },
704
+ {
705
+ "epoch": 15.3125,
706
+ "grad_norm": 2.71702241897583,
707
+ "learning_rate": 4.303174603174603e-05,
708
+ "loss": 0.1125,
709
+ "step": 980
710
+ },
711
+ {
712
+ "epoch": 15.46875,
713
+ "grad_norm": 2.713571786880493,
714
+ "learning_rate": 4.295238095238095e-05,
715
+ "loss": 0.137,
716
+ "step": 990
717
+ },
718
+ {
719
+ "epoch": 15.625,
720
+ "grad_norm": 2.975768804550171,
721
+ "learning_rate": 4.2873015873015875e-05,
722
+ "loss": 0.108,
723
+ "step": 1000
724
+ },
725
+ {
726
+ "epoch": 15.625,
727
+ "eval_runtime": 2.2622,
728
+ "eval_samples_per_second": 0.442,
729
+ "eval_steps_per_second": 0.442,
730
+ "step": 1000
731
+ },
732
+ {
733
+ "epoch": 15.78125,
734
+ "grad_norm": 3.5744853019714355,
735
+ "learning_rate": 4.2793650793650796e-05,
736
+ "loss": 0.1207,
737
+ "step": 1010
738
+ },
739
+ {
740
+ "epoch": 15.9375,
741
+ "grad_norm": 1.8845595121383667,
742
+ "learning_rate": 4.271428571428572e-05,
743
+ "loss": 0.1013,
744
+ "step": 1020
745
+ },
746
+ {
747
+ "epoch": 16.09375,
748
+ "grad_norm": 4.227961540222168,
749
+ "learning_rate": 4.263492063492064e-05,
750
+ "loss": 0.112,
751
+ "step": 1030
752
+ },
753
+ {
754
+ "epoch": 16.25,
755
+ "grad_norm": 3.289071798324585,
756
+ "learning_rate": 4.255555555555556e-05,
757
+ "loss": 0.1054,
758
+ "step": 1040
759
+ },
760
+ {
761
+ "epoch": 16.40625,
762
+ "grad_norm": 1.9135278463363647,
763
+ "learning_rate": 4.247619047619048e-05,
764
+ "loss": 0.0933,
765
+ "step": 1050
766
+ },
767
+ {
768
+ "epoch": 16.5625,
769
+ "grad_norm": 2.586151599884033,
770
+ "learning_rate": 4.2396825396825404e-05,
771
+ "loss": 0.1205,
772
+ "step": 1060
773
+ },
774
+ {
775
+ "epoch": 16.71875,
776
+ "grad_norm": 3.5681819915771484,
777
+ "learning_rate": 4.231746031746032e-05,
778
+ "loss": 0.1015,
779
+ "step": 1070
780
+ },
781
+ {
782
+ "epoch": 16.875,
783
+ "grad_norm": 3.7675039768218994,
784
+ "learning_rate": 4.223809523809524e-05,
785
+ "loss": 0.128,
786
+ "step": 1080
787
+ },
788
+ {
789
+ "epoch": 17.03125,
790
+ "grad_norm": 2.631277322769165,
791
+ "learning_rate": 4.215873015873016e-05,
792
+ "loss": 0.107,
793
+ "step": 1090
794
+ },
795
+ {
796
+ "epoch": 17.1875,
797
+ "grad_norm": 3.59251070022583,
798
+ "learning_rate": 4.2079365079365076e-05,
799
+ "loss": 0.0918,
800
+ "step": 1100
801
+ },
802
+ {
803
+ "epoch": 17.34375,
804
+ "grad_norm": 3.785374641418457,
805
+ "learning_rate": 4.2e-05,
806
+ "loss": 0.1031,
807
+ "step": 1110
808
+ },
809
+ {
810
+ "epoch": 17.5,
811
+ "grad_norm": 3.6201488971710205,
812
+ "learning_rate": 4.192063492063492e-05,
813
+ "loss": 0.0965,
814
+ "step": 1120
815
+ },
816
+ {
817
+ "epoch": 17.65625,
818
+ "grad_norm": 3.608516216278076,
819
+ "learning_rate": 4.184126984126984e-05,
820
+ "loss": 0.1046,
821
+ "step": 1130
822
+ },
823
+ {
824
+ "epoch": 17.8125,
825
+ "grad_norm": 3.3631608486175537,
826
+ "learning_rate": 4.176190476190476e-05,
827
+ "loss": 0.1047,
828
+ "step": 1140
829
+ },
830
+ {
831
+ "epoch": 17.96875,
832
+ "grad_norm": 2.2599599361419678,
833
+ "learning_rate": 4.1682539682539684e-05,
834
+ "loss": 0.0976,
835
+ "step": 1150
836
+ },
837
+ {
838
+ "epoch": 18.125,
839
+ "grad_norm": 3.3925111293792725,
840
+ "learning_rate": 4.1603174603174605e-05,
841
+ "loss": 0.081,
842
+ "step": 1160
843
+ },
844
+ {
845
+ "epoch": 18.28125,
846
+ "grad_norm": 2.5443062782287598,
847
+ "learning_rate": 4.152380952380953e-05,
848
+ "loss": 0.0917,
849
+ "step": 1170
850
+ },
851
+ {
852
+ "epoch": 18.4375,
853
+ "grad_norm": 4.180214881896973,
854
+ "learning_rate": 4.144444444444445e-05,
855
+ "loss": 0.0913,
856
+ "step": 1180
857
+ },
858
+ {
859
+ "epoch": 18.59375,
860
+ "grad_norm": 2.3229408264160156,
861
+ "learning_rate": 4.136507936507937e-05,
862
+ "loss": 0.081,
863
+ "step": 1190
864
+ },
865
+ {
866
+ "epoch": 18.75,
867
+ "grad_norm": 3.885908603668213,
868
+ "learning_rate": 4.128571428571429e-05,
869
+ "loss": 0.1078,
870
+ "step": 1200
871
+ },
872
+ {
873
+ "epoch": 18.90625,
874
+ "grad_norm": 3.1222493648529053,
875
+ "learning_rate": 4.120634920634921e-05,
876
+ "loss": 0.1177,
877
+ "step": 1210
878
+ },
879
+ {
880
+ "epoch": 19.0625,
881
+ "grad_norm": 3.3118879795074463,
882
+ "learning_rate": 4.112698412698413e-05,
883
+ "loss": 0.0775,
884
+ "step": 1220
885
+ },
886
+ {
887
+ "epoch": 19.21875,
888
+ "grad_norm": 1.8756282329559326,
889
+ "learning_rate": 4.104761904761905e-05,
890
+ "loss": 0.0906,
891
+ "step": 1230
892
+ },
893
+ {
894
+ "epoch": 19.375,
895
+ "grad_norm": 3.6675071716308594,
896
+ "learning_rate": 4.096825396825397e-05,
897
+ "loss": 0.0938,
898
+ "step": 1240
899
+ },
900
+ {
901
+ "epoch": 19.53125,
902
+ "grad_norm": 2.8696744441986084,
903
+ "learning_rate": 4.088888888888889e-05,
904
+ "loss": 0.0821,
905
+ "step": 1250
906
+ },
907
+ {
908
+ "epoch": 19.6875,
909
+ "grad_norm": 3.246438980102539,
910
+ "learning_rate": 4.0809523809523813e-05,
911
+ "loss": 0.1216,
912
+ "step": 1260
913
+ },
914
+ {
915
+ "epoch": 19.84375,
916
+ "grad_norm": 2.579602003097534,
917
+ "learning_rate": 4.073015873015873e-05,
918
+ "loss": 0.0705,
919
+ "step": 1270
920
+ },
921
+ {
922
+ "epoch": 20.0,
923
+ "grad_norm": 14.062989234924316,
924
+ "learning_rate": 4.065079365079365e-05,
925
+ "loss": 0.1125,
926
+ "step": 1280
927
+ },
928
+ {
929
+ "epoch": 20.15625,
930
+ "grad_norm": 2.5907585620880127,
931
+ "learning_rate": 4.057142857142857e-05,
932
+ "loss": 0.0764,
933
+ "step": 1290
934
+ },
935
+ {
936
+ "epoch": 20.3125,
937
+ "grad_norm": 4.850607872009277,
938
+ "learning_rate": 4.049206349206349e-05,
939
+ "loss": 0.0882,
940
+ "step": 1300
941
+ },
942
+ {
943
+ "epoch": 20.46875,
944
+ "grad_norm": 2.3619115352630615,
945
+ "learning_rate": 4.0412698412698414e-05,
946
+ "loss": 0.0833,
947
+ "step": 1310
948
+ },
949
+ {
950
+ "epoch": 20.625,
951
+ "grad_norm": 3.1803815364837646,
952
+ "learning_rate": 4.0333333333333336e-05,
953
+ "loss": 0.0894,
954
+ "step": 1320
955
+ },
956
+ {
957
+ "epoch": 20.78125,
958
+ "grad_norm": 15.570907592773438,
959
+ "learning_rate": 4.025396825396826e-05,
960
+ "loss": 0.2225,
961
+ "step": 1330
962
+ },
963
+ {
964
+ "epoch": 20.9375,
965
+ "grad_norm": 2.721440076828003,
966
+ "learning_rate": 4.018253968253968e-05,
967
+ "loss": 0.1233,
968
+ "step": 1340
969
+ },
970
+ {
971
+ "epoch": 21.09375,
972
+ "grad_norm": 1.798134446144104,
973
+ "learning_rate": 4.01031746031746e-05,
974
+ "loss": 0.067,
975
+ "step": 1350
976
+ },
977
+ {
978
+ "epoch": 21.25,
979
+ "grad_norm": 3.387782335281372,
980
+ "learning_rate": 4.0023809523809524e-05,
981
+ "loss": 0.0692,
982
+ "step": 1360
983
+ },
984
+ {
985
+ "epoch": 21.40625,
986
+ "grad_norm": 2.723196029663086,
987
+ "learning_rate": 3.9944444444444446e-05,
988
+ "loss": 0.0683,
989
+ "step": 1370
990
+ },
991
+ {
992
+ "epoch": 21.5625,
993
+ "grad_norm": 3.666444778442383,
994
+ "learning_rate": 3.986507936507937e-05,
995
+ "loss": 0.0883,
996
+ "step": 1380
997
+ },
998
+ {
999
+ "epoch": 21.71875,
1000
+ "grad_norm": 1.9571526050567627,
1001
+ "learning_rate": 3.978571428571429e-05,
1002
+ "loss": 0.0802,
1003
+ "step": 1390
1004
+ },
1005
+ {
1006
+ "epoch": 21.875,
1007
+ "grad_norm": 2.5271904468536377,
1008
+ "learning_rate": 3.970634920634921e-05,
1009
+ "loss": 0.0737,
1010
+ "step": 1400
1011
+ },
1012
+ {
1013
+ "epoch": 22.03125,
1014
+ "grad_norm": 3.5426900386810303,
1015
+ "learning_rate": 3.962698412698413e-05,
1016
+ "loss": 0.0838,
1017
+ "step": 1410
1018
+ },
1019
+ {
1020
+ "epoch": 22.1875,
1021
+ "grad_norm": 1.7901580333709717,
1022
+ "learning_rate": 3.954761904761905e-05,
1023
+ "loss": 0.0671,
1024
+ "step": 1420
1025
+ },
1026
+ {
1027
+ "epoch": 22.34375,
1028
+ "grad_norm": 3.4260764122009277,
1029
+ "learning_rate": 3.946825396825397e-05,
1030
+ "loss": 0.0775,
1031
+ "step": 1430
1032
+ },
1033
+ {
1034
+ "epoch": 22.5,
1035
+ "grad_norm": 2.499107837677002,
1036
+ "learning_rate": 3.938888888888889e-05,
1037
+ "loss": 0.0826,
1038
+ "step": 1440
1039
+ },
1040
+ {
1041
+ "epoch": 22.65625,
1042
+ "grad_norm": 2.7331862449645996,
1043
+ "learning_rate": 3.930952380952381e-05,
1044
+ "loss": 0.0771,
1045
+ "step": 1450
1046
+ },
1047
+ {
1048
+ "epoch": 22.8125,
1049
+ "grad_norm": 3.2004685401916504,
1050
+ "learning_rate": 3.923015873015873e-05,
1051
+ "loss": 0.088,
1052
+ "step": 1460
1053
+ },
1054
+ {
1055
+ "epoch": 22.96875,
1056
+ "grad_norm": 3.175179958343506,
1057
+ "learning_rate": 3.9150793650793654e-05,
1058
+ "loss": 0.0861,
1059
+ "step": 1470
1060
+ },
1061
+ {
1062
+ "epoch": 23.125,
1063
+ "grad_norm": 2.032646417617798,
1064
+ "learning_rate": 3.9071428571428575e-05,
1065
+ "loss": 0.0682,
1066
+ "step": 1480
1067
+ },
1068
+ {
1069
+ "epoch": 23.28125,
1070
+ "grad_norm": 1.5635634660720825,
1071
+ "learning_rate": 3.89920634920635e-05,
1072
+ "loss": 0.0833,
1073
+ "step": 1490
1074
+ },
1075
+ {
1076
+ "epoch": 23.4375,
1077
+ "grad_norm": 1.8121321201324463,
1078
+ "learning_rate": 3.891269841269842e-05,
1079
+ "loss": 0.0965,
1080
+ "step": 1500
1081
+ },
1082
+ {
1083
+ "epoch": 23.4375,
1084
+ "eval_runtime": 2.0794,
1085
+ "eval_samples_per_second": 0.481,
1086
+ "eval_steps_per_second": 0.481,
1087
+ "step": 1500
1088
+ },
1089
+ {
1090
+ "epoch": 23.59375,
1091
+ "grad_norm": 1.8793394565582275,
1092
+ "learning_rate": 3.883333333333333e-05,
1093
+ "loss": 0.0887,
1094
+ "step": 1510
1095
+ },
1096
+ {
1097
+ "epoch": 23.75,
1098
+ "grad_norm": 2.1231632232666016,
1099
+ "learning_rate": 3.8753968253968255e-05,
1100
+ "loss": 0.0879,
1101
+ "step": 1520
1102
+ },
1103
+ {
1104
+ "epoch": 23.90625,
1105
+ "grad_norm": 3.5764803886413574,
1106
+ "learning_rate": 3.8674603174603176e-05,
1107
+ "loss": 0.0861,
1108
+ "step": 1530
1109
+ },
1110
+ {
1111
+ "epoch": 24.0625,
1112
+ "grad_norm": 2.105710029602051,
1113
+ "learning_rate": 3.85952380952381e-05,
1114
+ "loss": 0.07,
1115
+ "step": 1540
1116
+ },
1117
+ {
1118
+ "epoch": 24.21875,
1119
+ "grad_norm": 2.8722870349884033,
1120
+ "learning_rate": 3.851587301587302e-05,
1121
+ "loss": 0.076,
1122
+ "step": 1550
1123
+ },
1124
+ {
1125
+ "epoch": 24.375,
1126
+ "grad_norm": 2.6126277446746826,
1127
+ "learning_rate": 3.843650793650794e-05,
1128
+ "loss": 0.0983,
1129
+ "step": 1560
1130
+ },
1131
+ {
1132
+ "epoch": 24.53125,
1133
+ "grad_norm": 2.2635769844055176,
1134
+ "learning_rate": 3.8357142857142855e-05,
1135
+ "loss": 0.0864,
1136
+ "step": 1570
1137
+ },
1138
+ {
1139
+ "epoch": 24.6875,
1140
+ "grad_norm": 2.11098313331604,
1141
+ "learning_rate": 3.827777777777778e-05,
1142
+ "loss": 0.0923,
1143
+ "step": 1580
1144
+ },
1145
+ {
1146
+ "epoch": 24.84375,
1147
+ "grad_norm": 1.6163533926010132,
1148
+ "learning_rate": 3.81984126984127e-05,
1149
+ "loss": 0.0711,
1150
+ "step": 1590
1151
+ },
1152
+ {
1153
+ "epoch": 25.0,
1154
+ "grad_norm": 2.54638671875,
1155
+ "learning_rate": 3.811904761904762e-05,
1156
+ "loss": 0.0705,
1157
+ "step": 1600
1158
+ },
1159
+ {
1160
+ "epoch": 25.15625,
1161
+ "grad_norm": 2.592470407485962,
1162
+ "learning_rate": 3.803968253968254e-05,
1163
+ "loss": 0.0765,
1164
+ "step": 1610
1165
+ },
1166
+ {
1167
+ "epoch": 25.3125,
1168
+ "grad_norm": 2.1981208324432373,
1169
+ "learning_rate": 3.796031746031746e-05,
1170
+ "loss": 0.0856,
1171
+ "step": 1620
1172
+ },
1173
+ {
1174
+ "epoch": 25.46875,
1175
+ "grad_norm": 1.384098768234253,
1176
+ "learning_rate": 3.7880952380952384e-05,
1177
+ "loss": 0.0951,
1178
+ "step": 1630
1179
+ },
1180
+ {
1181
+ "epoch": 25.625,
1182
+ "grad_norm": 1.502350091934204,
1183
+ "learning_rate": 3.7801587301587306e-05,
1184
+ "loss": 0.0858,
1185
+ "step": 1640
1186
+ },
1187
+ {
1188
+ "epoch": 25.78125,
1189
+ "grad_norm": 1.4763522148132324,
1190
+ "learning_rate": 3.772222222222223e-05,
1191
+ "loss": 0.0647,
1192
+ "step": 1650
1193
+ },
1194
+ {
1195
+ "epoch": 25.9375,
1196
+ "grad_norm": 1.7555052042007446,
1197
+ "learning_rate": 3.764285714285715e-05,
1198
+ "loss": 0.0874,
1199
+ "step": 1660
1200
+ },
1201
+ {
1202
+ "epoch": 26.09375,
1203
+ "grad_norm": 1.0187015533447266,
1204
+ "learning_rate": 3.756349206349207e-05,
1205
+ "loss": 0.0635,
1206
+ "step": 1670
1207
+ },
1208
+ {
1209
+ "epoch": 26.25,
1210
+ "grad_norm": 1.7138936519622803,
1211
+ "learning_rate": 3.7484126984126985e-05,
1212
+ "loss": 0.0772,
1213
+ "step": 1680
1214
+ },
1215
+ {
1216
+ "epoch": 26.40625,
1217
+ "grad_norm": 2.3353724479675293,
1218
+ "learning_rate": 3.7404761904761907e-05,
1219
+ "loss": 0.0892,
1220
+ "step": 1690
1221
+ },
1222
+ {
1223
+ "epoch": 26.5625,
1224
+ "grad_norm": 2.6141700744628906,
1225
+ "learning_rate": 3.732539682539682e-05,
1226
+ "loss": 0.0897,
1227
+ "step": 1700
1228
+ },
1229
+ {
1230
+ "epoch": 26.71875,
1231
+ "grad_norm": 1.7785848379135132,
1232
+ "learning_rate": 3.724603174603174e-05,
1233
+ "loss": 0.0899,
1234
+ "step": 1710
1235
+ },
1236
+ {
1237
+ "epoch": 26.875,
1238
+ "grad_norm": 2.6693010330200195,
1239
+ "learning_rate": 3.7166666666666664e-05,
1240
+ "loss": 0.0901,
1241
+ "step": 1720
1242
+ },
1243
+ {
1244
+ "epoch": 27.03125,
1245
+ "grad_norm": 2.059981346130371,
1246
+ "learning_rate": 3.7087301587301586e-05,
1247
+ "loss": 0.0822,
1248
+ "step": 1730
1249
+ },
1250
+ {
1251
+ "epoch": 27.1875,
1252
+ "grad_norm": 1.6238901615142822,
1253
+ "learning_rate": 3.700793650793651e-05,
1254
+ "loss": 0.0777,
1255
+ "step": 1740
1256
+ },
1257
+ {
1258
+ "epoch": 27.34375,
1259
+ "grad_norm": 2.782425880432129,
1260
+ "learning_rate": 3.692857142857143e-05,
1261
+ "loss": 0.0782,
1262
+ "step": 1750
1263
+ },
1264
+ {
1265
+ "epoch": 27.5,
1266
+ "grad_norm": 1.8468166589736938,
1267
+ "learning_rate": 3.684920634920635e-05,
1268
+ "loss": 0.0835,
1269
+ "step": 1760
1270
+ },
1271
+ {
1272
+ "epoch": 27.65625,
1273
+ "grad_norm": 1.3156135082244873,
1274
+ "learning_rate": 3.676984126984127e-05,
1275
+ "loss": 0.1072,
1276
+ "step": 1770
1277
+ },
1278
+ {
1279
+ "epoch": 27.8125,
1280
+ "grad_norm": 3.260084390640259,
1281
+ "learning_rate": 3.669047619047619e-05,
1282
+ "loss": 0.0939,
1283
+ "step": 1780
1284
+ },
1285
+ {
1286
+ "epoch": 27.96875,
1287
+ "grad_norm": 2.518204689025879,
1288
+ "learning_rate": 3.6611111111111115e-05,
1289
+ "loss": 0.0884,
1290
+ "step": 1790
1291
+ },
1292
+ {
1293
+ "epoch": 28.125,
1294
+ "grad_norm": 2.598057985305786,
1295
+ "learning_rate": 3.6531746031746036e-05,
1296
+ "loss": 0.0683,
1297
+ "step": 1800
1298
+ },
1299
+ {
1300
+ "epoch": 28.28125,
1301
+ "grad_norm": 1.8533433675765991,
1302
+ "learning_rate": 3.645238095238096e-05,
1303
+ "loss": 0.0833,
1304
+ "step": 1810
1305
+ },
1306
+ {
1307
+ "epoch": 28.4375,
1308
+ "grad_norm": 1.2828975915908813,
1309
+ "learning_rate": 3.637301587301587e-05,
1310
+ "loss": 0.0832,
1311
+ "step": 1820
1312
+ },
1313
+ {
1314
+ "epoch": 28.59375,
1315
+ "grad_norm": 1.7714905738830566,
1316
+ "learning_rate": 3.6293650793650794e-05,
1317
+ "loss": 0.0901,
1318
+ "step": 1830
1319
+ },
1320
+ {
1321
+ "epoch": 28.75,
1322
+ "grad_norm": 2.098923921585083,
1323
+ "learning_rate": 3.6214285714285716e-05,
1324
+ "loss": 0.0954,
1325
+ "step": 1840
1326
+ },
1327
+ {
1328
+ "epoch": 28.90625,
1329
+ "grad_norm": 2.298226833343506,
1330
+ "learning_rate": 3.613492063492064e-05,
1331
+ "loss": 0.0888,
1332
+ "step": 1850
1333
+ },
1334
+ {
1335
+ "epoch": 29.0625,
1336
+ "grad_norm": 1.5519624948501587,
1337
+ "learning_rate": 3.605555555555556e-05,
1338
+ "loss": 0.0664,
1339
+ "step": 1860
1340
+ },
1341
+ {
1342
+ "epoch": 29.21875,
1343
+ "grad_norm": 2.015573501586914,
1344
+ "learning_rate": 3.597619047619048e-05,
1345
+ "loss": 0.0637,
1346
+ "step": 1870
1347
+ },
1348
+ {
1349
+ "epoch": 29.375,
1350
+ "grad_norm": 1.925529956817627,
1351
+ "learning_rate": 3.58968253968254e-05,
1352
+ "loss": 0.0803,
1353
+ "step": 1880
1354
+ },
1355
+ {
1356
+ "epoch": 29.53125,
1357
+ "grad_norm": 2.4342522621154785,
1358
+ "learning_rate": 3.581746031746032e-05,
1359
+ "loss": 0.0911,
1360
+ "step": 1890
1361
+ },
1362
+ {
1363
+ "epoch": 29.6875,
1364
+ "grad_norm": 1.8124195337295532,
1365
+ "learning_rate": 3.573809523809524e-05,
1366
+ "loss": 0.0803,
1367
+ "step": 1900
1368
+ },
1369
+ {
1370
+ "epoch": 29.84375,
1371
+ "grad_norm": 2.3409860134124756,
1372
+ "learning_rate": 3.565873015873016e-05,
1373
+ "loss": 0.0943,
1374
+ "step": 1910
1375
+ },
1376
+ {
1377
+ "epoch": 30.0,
1378
+ "grad_norm": 1.2545162439346313,
1379
+ "learning_rate": 3.557936507936508e-05,
1380
+ "loss": 0.0853,
1381
+ "step": 1920
1382
+ },
1383
+ {
1384
+ "epoch": 30.15625,
1385
+ "grad_norm": 1.9356091022491455,
1386
+ "learning_rate": 3.55e-05,
1387
+ "loss": 0.0585,
1388
+ "step": 1930
1389
+ },
1390
+ {
1391
+ "epoch": 30.3125,
1392
+ "grad_norm": 1.6294385194778442,
1393
+ "learning_rate": 3.5420634920634924e-05,
1394
+ "loss": 0.0876,
1395
+ "step": 1940
1396
+ },
1397
+ {
1398
+ "epoch": 30.46875,
1399
+ "grad_norm": 2.081688165664673,
1400
+ "learning_rate": 3.534126984126984e-05,
1401
+ "loss": 0.0827,
1402
+ "step": 1950
1403
+ },
1404
+ {
1405
+ "epoch": 30.625,
1406
+ "grad_norm": 1.7468382120132446,
1407
+ "learning_rate": 3.526190476190476e-05,
1408
+ "loss": 0.0732,
1409
+ "step": 1960
1410
+ },
1411
+ {
1412
+ "epoch": 30.78125,
1413
+ "grad_norm": 2.1742124557495117,
1414
+ "learning_rate": 3.518253968253968e-05,
1415
+ "loss": 0.084,
1416
+ "step": 1970
1417
+ },
1418
+ {
1419
+ "epoch": 30.9375,
1420
+ "grad_norm": 2.147754430770874,
1421
+ "learning_rate": 3.51031746031746e-05,
1422
+ "loss": 0.0856,
1423
+ "step": 1980
1424
+ },
1425
+ {
1426
+ "epoch": 31.09375,
1427
+ "grad_norm": 2.1251964569091797,
1428
+ "learning_rate": 3.5023809523809525e-05,
1429
+ "loss": 0.0762,
1430
+ "step": 1990
1431
+ },
1432
+ {
1433
+ "epoch": 31.25,
1434
+ "grad_norm": 2.6378941535949707,
1435
+ "learning_rate": 3.4944444444444446e-05,
1436
+ "loss": 0.071,
1437
+ "step": 2000
1438
+ },
1439
+ {
1440
+ "epoch": 31.25,
1441
+ "eval_runtime": 2.0743,
1442
+ "eval_samples_per_second": 0.482,
1443
+ "eval_steps_per_second": 0.482,
1444
+ "step": 2000
1445
+ },
1446
+ {
1447
+ "epoch": 31.40625,
1448
+ "grad_norm": 2.126807689666748,
1449
+ "learning_rate": 3.486507936507937e-05,
1450
+ "loss": 0.0785,
1451
+ "step": 2010
1452
+ },
1453
+ {
1454
+ "epoch": 31.5625,
1455
+ "grad_norm": 2.4585835933685303,
1456
+ "learning_rate": 3.478571428571429e-05,
1457
+ "loss": 0.1053,
1458
+ "step": 2020
1459
+ },
1460
+ {
1461
+ "epoch": 31.71875,
1462
+ "grad_norm": 1.9649542570114136,
1463
+ "learning_rate": 3.470634920634921e-05,
1464
+ "loss": 0.0734,
1465
+ "step": 2030
1466
+ },
1467
+ {
1468
+ "epoch": 31.875,
1469
+ "grad_norm": 2.3006460666656494,
1470
+ "learning_rate": 3.462698412698413e-05,
1471
+ "loss": 0.0858,
1472
+ "step": 2040
1473
+ },
1474
+ {
1475
+ "epoch": 32.03125,
1476
+ "grad_norm": 2.163447856903076,
1477
+ "learning_rate": 3.4547619047619054e-05,
1478
+ "loss": 0.0826,
1479
+ "step": 2050
1480
+ },
1481
+ {
1482
+ "epoch": 32.1875,
1483
+ "grad_norm": 2.0441508293151855,
1484
+ "learning_rate": 3.4468253968253975e-05,
1485
+ "loss": 0.0712,
1486
+ "step": 2060
1487
+ },
1488
+ {
1489
+ "epoch": 32.34375,
1490
+ "grad_norm": 1.5795445442199707,
1491
+ "learning_rate": 3.438888888888889e-05,
1492
+ "loss": 0.056,
1493
+ "step": 2070
1494
+ },
1495
+ {
1496
+ "epoch": 32.5,
1497
+ "grad_norm": 3.5661540031433105,
1498
+ "learning_rate": 3.430952380952381e-05,
1499
+ "loss": 0.069,
1500
+ "step": 2080
1501
+ },
1502
+ {
1503
+ "epoch": 32.65625,
1504
+ "grad_norm": 1.6884055137634277,
1505
+ "learning_rate": 3.423015873015873e-05,
1506
+ "loss": 0.0899,
1507
+ "step": 2090
1508
+ },
1509
+ {
1510
+ "epoch": 32.8125,
1511
+ "grad_norm": 2.421724557876587,
1512
+ "learning_rate": 3.415079365079365e-05,
1513
+ "loss": 0.0768,
1514
+ "step": 2100
1515
+ },
1516
+ {
1517
+ "epoch": 32.96875,
1518
+ "grad_norm": 1.1584899425506592,
1519
+ "learning_rate": 3.407142857142857e-05,
1520
+ "loss": 0.0763,
1521
+ "step": 2110
1522
+ },
1523
+ {
1524
+ "epoch": 33.125,
1525
+ "grad_norm": 2.069801092147827,
1526
+ "learning_rate": 3.399206349206349e-05,
1527
+ "loss": 0.0636,
1528
+ "step": 2120
1529
+ },
1530
+ {
1531
+ "epoch": 33.28125,
1532
+ "grad_norm": 2.5826573371887207,
1533
+ "learning_rate": 3.391269841269841e-05,
1534
+ "loss": 0.0676,
1535
+ "step": 2130
1536
+ },
1537
+ {
1538
+ "epoch": 33.4375,
1539
+ "grad_norm": 1.119449496269226,
1540
+ "learning_rate": 3.3833333333333334e-05,
1541
+ "loss": 0.0613,
1542
+ "step": 2140
1543
+ },
1544
+ {
1545
+ "epoch": 33.59375,
1546
+ "grad_norm": 1.6656103134155273,
1547
+ "learning_rate": 3.3753968253968255e-05,
1548
+ "loss": 0.0598,
1549
+ "step": 2150
1550
+ },
1551
+ {
1552
+ "epoch": 33.75,
1553
+ "grad_norm": 1.7956265211105347,
1554
+ "learning_rate": 3.3674603174603177e-05,
1555
+ "loss": 0.0778,
1556
+ "step": 2160
1557
+ },
1558
+ {
1559
+ "epoch": 33.90625,
1560
+ "grad_norm": 2.2106685638427734,
1561
+ "learning_rate": 3.35952380952381e-05,
1562
+ "loss": 0.0737,
1563
+ "step": 2170
1564
+ },
1565
+ {
1566
+ "epoch": 34.0625,
1567
+ "grad_norm": 2.4478724002838135,
1568
+ "learning_rate": 3.351587301587302e-05,
1569
+ "loss": 0.0642,
1570
+ "step": 2180
1571
+ },
1572
+ {
1573
+ "epoch": 34.21875,
1574
+ "grad_norm": 2.3334341049194336,
1575
+ "learning_rate": 3.343650793650794e-05,
1576
+ "loss": 0.0697,
1577
+ "step": 2190
1578
+ },
1579
+ {
1580
+ "epoch": 34.375,
1581
+ "grad_norm": 1.870275855064392,
1582
+ "learning_rate": 3.3357142857142856e-05,
1583
+ "loss": 0.0523,
1584
+ "step": 2200
1585
+ },
1586
+ {
1587
+ "epoch": 34.53125,
1588
+ "grad_norm": 1.283444881439209,
1589
+ "learning_rate": 3.327777777777778e-05,
1590
+ "loss": 0.0619,
1591
+ "step": 2210
1592
+ },
1593
+ {
1594
+ "epoch": 34.6875,
1595
+ "grad_norm": 1.7918671369552612,
1596
+ "learning_rate": 3.31984126984127e-05,
1597
+ "loss": 0.061,
1598
+ "step": 2220
1599
+ },
1600
+ {
1601
+ "epoch": 34.84375,
1602
+ "grad_norm": 1.6546680927276611,
1603
+ "learning_rate": 3.311904761904762e-05,
1604
+ "loss": 0.058,
1605
+ "step": 2230
1606
+ },
1607
+ {
1608
+ "epoch": 35.0,
1609
+ "grad_norm": 3.8063647747039795,
1610
+ "learning_rate": 3.303968253968254e-05,
1611
+ "loss": 0.0605,
1612
+ "step": 2240
1613
+ },
1614
+ {
1615
+ "epoch": 35.15625,
1616
+ "grad_norm": 1.8701483011245728,
1617
+ "learning_rate": 3.296031746031746e-05,
1618
+ "loss": 0.0652,
1619
+ "step": 2250
1620
+ },
1621
+ {
1622
+ "epoch": 35.3125,
1623
+ "grad_norm": 0.48190346360206604,
1624
+ "learning_rate": 3.2880952380952385e-05,
1625
+ "loss": 0.0443,
1626
+ "step": 2260
1627
+ },
1628
+ {
1629
+ "epoch": 35.46875,
1630
+ "grad_norm": 1.0227997303009033,
1631
+ "learning_rate": 3.2801587301587306e-05,
1632
+ "loss": 0.0397,
1633
+ "step": 2270
1634
+ },
1635
+ {
1636
+ "epoch": 35.625,
1637
+ "grad_norm": 1.4256937503814697,
1638
+ "learning_rate": 3.272222222222223e-05,
1639
+ "loss": 0.0541,
1640
+ "step": 2280
1641
+ },
1642
+ {
1643
+ "epoch": 35.78125,
1644
+ "grad_norm": 2.3451199531555176,
1645
+ "learning_rate": 3.264285714285714e-05,
1646
+ "loss": 0.0568,
1647
+ "step": 2290
1648
+ },
1649
+ {
1650
+ "epoch": 35.9375,
1651
+ "grad_norm": 1.3683526515960693,
1652
+ "learning_rate": 3.2563492063492064e-05,
1653
+ "loss": 0.0587,
1654
+ "step": 2300
1655
+ },
1656
+ {
1657
+ "epoch": 36.09375,
1658
+ "grad_norm": 1.4543867111206055,
1659
+ "learning_rate": 3.2484126984126986e-05,
1660
+ "loss": 0.0394,
1661
+ "step": 2310
1662
+ },
1663
+ {
1664
+ "epoch": 36.25,
1665
+ "grad_norm": 1.9606877565383911,
1666
+ "learning_rate": 3.240476190476191e-05,
1667
+ "loss": 0.0545,
1668
+ "step": 2320
1669
+ },
1670
+ {
1671
+ "epoch": 36.40625,
1672
+ "grad_norm": 1.6910959482192993,
1673
+ "learning_rate": 3.232539682539683e-05,
1674
+ "loss": 0.0638,
1675
+ "step": 2330
1676
+ },
1677
+ {
1678
+ "epoch": 36.5625,
1679
+ "grad_norm": 1.735841155052185,
1680
+ "learning_rate": 3.224603174603174e-05,
1681
+ "loss": 0.0569,
1682
+ "step": 2340
1683
+ },
1684
+ {
1685
+ "epoch": 36.71875,
1686
+ "grad_norm": 1.5598944425582886,
1687
+ "learning_rate": 3.2166666666666665e-05,
1688
+ "loss": 0.0649,
1689
+ "step": 2350
1690
+ },
1691
+ {
1692
+ "epoch": 36.875,
1693
+ "grad_norm": 1.5700335502624512,
1694
+ "learning_rate": 3.2087301587301586e-05,
1695
+ "loss": 0.0689,
1696
+ "step": 2360
1697
+ },
1698
+ {
1699
+ "epoch": 37.03125,
1700
+ "grad_norm": 1.0767812728881836,
1701
+ "learning_rate": 3.200793650793651e-05,
1702
+ "loss": 0.0535,
1703
+ "step": 2370
1704
+ },
1705
+ {
1706
+ "epoch": 37.1875,
1707
+ "grad_norm": 2.505707263946533,
1708
+ "learning_rate": 3.192857142857143e-05,
1709
+ "loss": 0.0486,
1710
+ "step": 2380
1711
+ },
1712
+ {
1713
+ "epoch": 37.34375,
1714
+ "grad_norm": 2.1414477825164795,
1715
+ "learning_rate": 3.184920634920635e-05,
1716
+ "loss": 0.0462,
1717
+ "step": 2390
1718
+ },
1719
+ {
1720
+ "epoch": 37.5,
1721
+ "grad_norm": 1.0493581295013428,
1722
+ "learning_rate": 3.176984126984127e-05,
1723
+ "loss": 0.0515,
1724
+ "step": 2400
1725
+ },
1726
+ {
1727
+ "epoch": 37.65625,
1728
+ "grad_norm": 1.5255446434020996,
1729
+ "learning_rate": 3.1690476190476194e-05,
1730
+ "loss": 0.0485,
1731
+ "step": 2410
1732
+ },
1733
+ {
1734
+ "epoch": 37.8125,
1735
+ "grad_norm": 1.3334754705429077,
1736
+ "learning_rate": 3.1611111111111115e-05,
1737
+ "loss": 0.056,
1738
+ "step": 2420
1739
+ },
1740
+ {
1741
+ "epoch": 37.96875,
1742
+ "grad_norm": 1.745110273361206,
1743
+ "learning_rate": 3.153174603174604e-05,
1744
+ "loss": 0.0473,
1745
+ "step": 2430
1746
+ },
1747
+ {
1748
+ "epoch": 38.125,
1749
+ "grad_norm": 1.4413131475448608,
1750
+ "learning_rate": 3.145238095238096e-05,
1751
+ "loss": 0.0317,
1752
+ "step": 2440
1753
+ },
1754
+ {
1755
+ "epoch": 38.28125,
1756
+ "grad_norm": 1.6811962127685547,
1757
+ "learning_rate": 3.137301587301587e-05,
1758
+ "loss": 0.0496,
1759
+ "step": 2450
1760
+ },
1761
+ {
1762
+ "epoch": 38.4375,
1763
+ "grad_norm": 1.2820957899093628,
1764
+ "learning_rate": 3.1293650793650795e-05,
1765
+ "loss": 0.0369,
1766
+ "step": 2460
1767
+ },
1768
+ {
1769
+ "epoch": 38.59375,
1770
+ "grad_norm": 1.7850005626678467,
1771
+ "learning_rate": 3.1214285714285716e-05,
1772
+ "loss": 0.0478,
1773
+ "step": 2470
1774
+ },
1775
+ {
1776
+ "epoch": 38.75,
1777
+ "grad_norm": 2.456017255783081,
1778
+ "learning_rate": 3.113492063492064e-05,
1779
+ "loss": 0.0507,
1780
+ "step": 2480
1781
+ },
1782
+ {
1783
+ "epoch": 38.90625,
1784
+ "grad_norm": 2.1933865547180176,
1785
+ "learning_rate": 3.105555555555555e-05,
1786
+ "loss": 0.0436,
1787
+ "step": 2490
1788
+ },
1789
+ {
1790
+ "epoch": 39.0625,
1791
+ "grad_norm": 0.9716876745223999,
1792
+ "learning_rate": 3.0976190476190474e-05,
1793
+ "loss": 0.0439,
1794
+ "step": 2500
1795
+ },
1796
+ {
1797
+ "epoch": 39.0625,
1798
+ "eval_runtime": 2.0072,
1799
+ "eval_samples_per_second": 0.498,
1800
+ "eval_steps_per_second": 0.498,
1801
+ "step": 2500
1802
+ },
1803
+ {
1804
+ "epoch": 39.21875,
1805
+ "grad_norm": 2.0628113746643066,
1806
+ "learning_rate": 3.0896825396825395e-05,
1807
+ "loss": 0.0408,
1808
+ "step": 2510
1809
+ },
1810
+ {
1811
+ "epoch": 39.375,
1812
+ "grad_norm": 1.5137745141983032,
1813
+ "learning_rate": 3.081746031746032e-05,
1814
+ "loss": 0.0445,
1815
+ "step": 2520
1816
+ },
1817
+ {
1818
+ "epoch": 39.53125,
1819
+ "grad_norm": 1.6775884628295898,
1820
+ "learning_rate": 3.073809523809524e-05,
1821
+ "loss": 0.051,
1822
+ "step": 2530
1823
+ },
1824
+ {
1825
+ "epoch": 39.6875,
1826
+ "grad_norm": 1.440619707107544,
1827
+ "learning_rate": 3.065873015873016e-05,
1828
+ "loss": 0.0387,
1829
+ "step": 2540
1830
+ },
1831
+ {
1832
+ "epoch": 39.84375,
1833
+ "grad_norm": 2.1149260997772217,
1834
+ "learning_rate": 3.057936507936508e-05,
1835
+ "loss": 0.0387,
1836
+ "step": 2550
1837
+ },
1838
+ {
1839
+ "epoch": 40.0,
1840
+ "grad_norm": 2.163191318511963,
1841
+ "learning_rate": 3.05e-05,
1842
+ "loss": 0.0363,
1843
+ "step": 2560
1844
+ },
1845
+ {
1846
+ "epoch": 40.15625,
1847
+ "grad_norm": 3.246634006500244,
1848
+ "learning_rate": 3.042063492063492e-05,
1849
+ "loss": 0.0429,
1850
+ "step": 2570
1851
+ },
1852
+ {
1853
+ "epoch": 40.3125,
1854
+ "grad_norm": 1.188644289970398,
1855
+ "learning_rate": 3.0341269841269842e-05,
1856
+ "loss": 0.0316,
1857
+ "step": 2580
1858
+ },
1859
+ {
1860
+ "epoch": 40.46875,
1861
+ "grad_norm": 2.2617383003234863,
1862
+ "learning_rate": 3.0261904761904764e-05,
1863
+ "loss": 0.0406,
1864
+ "step": 2590
1865
+ },
1866
+ {
1867
+ "epoch": 40.625,
1868
+ "grad_norm": 0.6037064790725708,
1869
+ "learning_rate": 3.0182539682539685e-05,
1870
+ "loss": 0.031,
1871
+ "step": 2600
1872
+ },
1873
+ {
1874
+ "epoch": 40.78125,
1875
+ "grad_norm": 1.1936763525009155,
1876
+ "learning_rate": 3.0103174603174607e-05,
1877
+ "loss": 0.0401,
1878
+ "step": 2610
1879
+ },
1880
+ {
1881
+ "epoch": 40.9375,
1882
+ "grad_norm": 3.0310215950012207,
1883
+ "learning_rate": 3.0023809523809525e-05,
1884
+ "loss": 0.0339,
1885
+ "step": 2620
1886
+ },
1887
+ {
1888
+ "epoch": 41.09375,
1889
+ "grad_norm": 0.6716585755348206,
1890
+ "learning_rate": 2.9944444444444446e-05,
1891
+ "loss": 0.0338,
1892
+ "step": 2630
1893
+ },
1894
+ {
1895
+ "epoch": 41.25,
1896
+ "grad_norm": 0.5010519623756409,
1897
+ "learning_rate": 2.9865079365079368e-05,
1898
+ "loss": 0.0315,
1899
+ "step": 2640
1900
+ }
1901
+ ],
1902
+ "logging_steps": 10,
1903
+ "max_steps": 6400,
1904
+ "num_input_tokens_seen": 0,
1905
+ "num_train_epochs": 100,
1906
+ "save_steps": 115,
1907
+ "stateful_callbacks": {
1908
+ "TrainerControl": {
1909
+ "args": {
1910
+ "should_epoch_stop": false,
1911
+ "should_evaluate": false,
1912
+ "should_log": false,
1913
+ "should_save": true,
1914
+ "should_training_stop": false
1915
+ },
1916
+ "attributes": {}
1917
+ }
1918
+ },
1919
+ "total_flos": 0.0,
1920
+ "train_batch_size": 8,
1921
+ "trial_name": null,
1922
+ "trial_params": null
1923
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ffe8194a454fde0f449eb7c7e43672ffa6b49ff9ee6aba76958f32161079d0b
3
+ size 5713
ve.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0921cab452fa278bc25cd23ffd59d36f816d7dc5181dd1bef9751a7fb61f63c
3
+ size 5695784