EO-1-3B / trainer_state.json
delinqu's picture
Upload folder using huggingface_hub
b7b76ba verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.074558032282859,
"eval_steps": 500,
"global_step": 60000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"ar_loss": 0.7918,
"epoch": 0.015039855617386072,
"fm_loss": 2.2779,
"grad_norm": 4.37157678604126,
"learning_rate": 4.9999971366617235e-05,
"loss": 3.0697,
"step": 100
},
{
"ar_loss": 0.6852,
"epoch": 0.030079711234772145,
"fm_loss": 0.5703,
"grad_norm": 4.328033447265625,
"learning_rate": 4.999951278658594e-05,
"loss": 1.2555,
"step": 200
},
{
"ar_loss": 0.669,
"epoch": 0.04511956685215822,
"fm_loss": 0.3866,
"grad_norm": 3.0744214057922363,
"learning_rate": 4.9998494972632174e-05,
"loss": 1.0556,
"step": 300
},
{
"ar_loss": 0.6594,
"epoch": 0.06015942246954429,
"fm_loss": 0.319,
"grad_norm": 2.9869937896728516,
"learning_rate": 4.9996917947524234e-05,
"loss": 0.9783,
"step": 400
},
{
"ar_loss": 0.6539,
"epoch": 0.07519927808693036,
"fm_loss": 0.2901,
"grad_norm": 2.676532745361328,
"learning_rate": 4.999478174653984e-05,
"loss": 0.9441,
"step": 500
},
{
"ar_loss": 0.6451,
"epoch": 0.09023913370431644,
"fm_loss": 0.271,
"grad_norm": 2.8496103286743164,
"learning_rate": 4.999208641746537e-05,
"loss": 0.9161,
"step": 600
},
{
"ar_loss": 0.639,
"epoch": 0.10527898932170252,
"fm_loss": 0.262,
"grad_norm": 3.5984983444213867,
"learning_rate": 4.998883202059478e-05,
"loss": 0.901,
"step": 700
},
{
"ar_loss": 0.6368,
"epoch": 0.12031884493908858,
"fm_loss": 0.251,
"grad_norm": 3.3547682762145996,
"learning_rate": 4.998501862872824e-05,
"loss": 0.8878,
"step": 800
},
{
"ar_loss": 0.6325,
"epoch": 0.13535870055647467,
"fm_loss": 0.2422,
"grad_norm": 3.7177250385284424,
"learning_rate": 4.998064632717054e-05,
"loss": 0.8747,
"step": 900
},
{
"ar_loss": 0.6295,
"epoch": 0.15039855617386072,
"fm_loss": 0.2364,
"grad_norm": 2.8715012073516846,
"learning_rate": 4.997571521372918e-05,
"loss": 0.8659,
"step": 1000
},
{
"ar_loss": 0.6261,
"epoch": 0.1654384117912468,
"fm_loss": 0.233,
"grad_norm": 2.686033248901367,
"learning_rate": 4.9970225398712116e-05,
"loss": 0.8591,
"step": 1100
},
{
"ar_loss": 0.6227,
"epoch": 0.18047826740863288,
"fm_loss": 0.2337,
"grad_norm": 2.735515594482422,
"learning_rate": 4.99641770049254e-05,
"loss": 0.8564,
"step": 1200
},
{
"ar_loss": 0.6288,
"epoch": 0.19551812302601895,
"fm_loss": 0.2343,
"grad_norm": 3.192143440246582,
"learning_rate": 4.995757016767032e-05,
"loss": 0.8631,
"step": 1300
},
{
"ar_loss": 0.6179,
"epoch": 0.21055797864340503,
"fm_loss": 0.2254,
"grad_norm": 4.529001235961914,
"learning_rate": 4.995040503474049e-05,
"loss": 0.8433,
"step": 1400
},
{
"ar_loss": 0.6117,
"epoch": 0.2255978342607911,
"fm_loss": 0.2265,
"grad_norm": 2.0048255920410156,
"learning_rate": 4.994268176641842e-05,
"loss": 0.8382,
"step": 1500
},
{
"ar_loss": 0.6197,
"epoch": 0.24063768987817716,
"fm_loss": 0.2174,
"grad_norm": 2.0079429149627686,
"learning_rate": 4.993440053547204e-05,
"loss": 0.8372,
"step": 1600
},
{
"ar_loss": 0.6073,
"epoch": 0.25567754549556326,
"fm_loss": 0.2158,
"grad_norm": 2.0703108310699463,
"learning_rate": 4.992556152715076e-05,
"loss": 0.8231,
"step": 1700
},
{
"ar_loss": 0.6091,
"epoch": 0.27071740111294934,
"fm_loss": 0.2121,
"grad_norm": 2.3248846530914307,
"learning_rate": 4.991616493918137e-05,
"loss": 0.8213,
"step": 1800
},
{
"ar_loss": 0.608,
"epoch": 0.28575725673033536,
"fm_loss": 0.2088,
"grad_norm": 2.5417592525482178,
"learning_rate": 4.99062109817636e-05,
"loss": 0.8167,
"step": 1900
},
{
"ar_loss": 0.6097,
"epoch": 0.30079711234772144,
"fm_loss": 0.21,
"grad_norm": 2.219682216644287,
"learning_rate": 4.989569987756542e-05,
"loss": 0.8196,
"step": 2000
},
{
"ar_loss": 0.6056,
"epoch": 0.3158369679651075,
"fm_loss": 0.2079,
"grad_norm": 1.9187816381454468,
"learning_rate": 4.988463186171804e-05,
"loss": 0.8136,
"step": 2100
},
{
"ar_loss": 0.6064,
"epoch": 0.3308768235824936,
"fm_loss": 0.2072,
"grad_norm": 2.246711492538452,
"learning_rate": 4.987300718181068e-05,
"loss": 0.8136,
"step": 2200
},
{
"ar_loss": 0.6015,
"epoch": 0.3459166791998797,
"fm_loss": 0.2044,
"grad_norm": 1.855542540550232,
"learning_rate": 4.986082609788504e-05,
"loss": 0.8059,
"step": 2300
},
{
"ar_loss": 0.6,
"epoch": 0.36095653481726575,
"fm_loss": 0.2081,
"grad_norm": 1.883419156074524,
"learning_rate": 4.9848088882429426e-05,
"loss": 0.8082,
"step": 2400
},
{
"ar_loss": 0.6048,
"epoch": 0.37599639043465183,
"fm_loss": 0.201,
"grad_norm": 4.141597747802734,
"learning_rate": 4.983479582037272e-05,
"loss": 0.8058,
"step": 2500
},
{
"ar_loss": 0.6025,
"epoch": 0.3910362460520379,
"fm_loss": 0.2012,
"grad_norm": 1.3507440090179443,
"learning_rate": 4.9820947209077965e-05,
"loss": 0.8037,
"step": 2600
},
{
"ar_loss": 0.5966,
"epoch": 0.406076101669424,
"fm_loss": 0.2103,
"grad_norm": 1.5875388383865356,
"learning_rate": 4.980654335833572e-05,
"loss": 0.8069,
"step": 2700
},
{
"ar_loss": 0.5995,
"epoch": 0.42111595728681006,
"fm_loss": 0.1975,
"grad_norm": 2.4850363731384277,
"learning_rate": 4.979158459035715e-05,
"loss": 0.7971,
"step": 2800
},
{
"ar_loss": 0.595,
"epoch": 0.43615581290419614,
"fm_loss": 0.1981,
"grad_norm": 2.0931544303894043,
"learning_rate": 4.97760712397668e-05,
"loss": 0.7931,
"step": 2900
},
{
"ar_loss": 0.5959,
"epoch": 0.4511956685215822,
"fm_loss": 0.1984,
"grad_norm": 2.2044456005096436,
"learning_rate": 4.97600036535951e-05,
"loss": 0.7943,
"step": 3000
},
{
"ar_loss": 0.5894,
"epoch": 0.4662355241389683,
"fm_loss": 0.197,
"grad_norm": 2.905266046524048,
"learning_rate": 4.974338219127062e-05,
"loss": 0.7863,
"step": 3100
},
{
"ar_loss": 0.5851,
"epoch": 0.4812753797563543,
"fm_loss": 0.1953,
"grad_norm": 2.5252127647399902,
"learning_rate": 4.9726207224612034e-05,
"loss": 0.7805,
"step": 3200
},
{
"ar_loss": 0.5936,
"epoch": 0.4963152353737404,
"fm_loss": 0.1957,
"grad_norm": 2.7935545444488525,
"learning_rate": 4.97084791378198e-05,
"loss": 0.7893,
"step": 3300
},
{
"ar_loss": 0.5869,
"epoch": 0.5113550909911265,
"fm_loss": 0.1933,
"grad_norm": 1.2401388883590698,
"learning_rate": 4.9690198327467534e-05,
"loss": 0.7802,
"step": 3400
},
{
"ar_loss": 0.5888,
"epoch": 0.5263949466085126,
"fm_loss": 0.1944,
"grad_norm": 3.7216362953186035,
"learning_rate": 4.967136520249318e-05,
"loss": 0.7832,
"step": 3500
},
{
"ar_loss": 0.587,
"epoch": 0.5414348022258987,
"fm_loss": 0.1916,
"grad_norm": 1.946953296661377,
"learning_rate": 4.965198018418985e-05,
"loss": 0.7786,
"step": 3600
},
{
"ar_loss": 0.5828,
"epoch": 0.5564746578432848,
"fm_loss": 0.1921,
"grad_norm": 1.695999264717102,
"learning_rate": 4.963204370619637e-05,
"loss": 0.7749,
"step": 3700
},
{
"ar_loss": 0.5832,
"epoch": 0.5715145134606707,
"fm_loss": 0.1973,
"grad_norm": 2.257406234741211,
"learning_rate": 4.9611556214487645e-05,
"loss": 0.7805,
"step": 3800
},
{
"ar_loss": 0.5825,
"epoch": 0.5865543690780568,
"fm_loss": 0.1915,
"grad_norm": 2.4373984336853027,
"learning_rate": 4.95905181673646e-05,
"loss": 0.7739,
"step": 3900
},
{
"ar_loss": 0.5809,
"epoch": 0.6015942246954429,
"fm_loss": 0.1924,
"grad_norm": 1.4836270809173584,
"learning_rate": 4.956893003544401e-05,
"loss": 0.7733,
"step": 4000
},
{
"ar_loss": 0.5801,
"epoch": 0.616634080312829,
"fm_loss": 0.1956,
"grad_norm": 1.864709734916687,
"learning_rate": 4.954679230164789e-05,
"loss": 0.7757,
"step": 4100
},
{
"ar_loss": 0.5824,
"epoch": 0.631673935930215,
"fm_loss": 0.1895,
"grad_norm": 1.5477041006088257,
"learning_rate": 4.952410546119278e-05,
"loss": 0.7719,
"step": 4200
},
{
"ar_loss": 0.5819,
"epoch": 0.6467137915476011,
"fm_loss": 0.1925,
"grad_norm": 1.6031211614608765,
"learning_rate": 4.95008700215786e-05,
"loss": 0.7744,
"step": 4300
},
{
"ar_loss": 0.5778,
"epoch": 0.6617536471649872,
"fm_loss": 0.2067,
"grad_norm": 5.476424694061279,
"learning_rate": 4.947708650257732e-05,
"loss": 0.7845,
"step": 4400
},
{
"ar_loss": 0.5767,
"epoch": 0.6767935027823733,
"fm_loss": 0.1875,
"grad_norm": 1.901498794555664,
"learning_rate": 4.945275543622133e-05,
"loss": 0.7642,
"step": 4500
},
{
"ar_loss": 0.5766,
"epoch": 0.6918333583997593,
"fm_loss": 0.1912,
"grad_norm": 1.4781795740127563,
"learning_rate": 4.942787736679153e-05,
"loss": 0.7678,
"step": 4600
},
{
"ar_loss": 0.5735,
"epoch": 0.7068732140171454,
"fm_loss": 0.1893,
"grad_norm": 2.639249801635742,
"learning_rate": 4.940245285080521e-05,
"loss": 0.7628,
"step": 4700
},
{
"ar_loss": 0.5736,
"epoch": 0.7219130696345315,
"fm_loss": 0.1854,
"grad_norm": 1.5374678373336792,
"learning_rate": 4.93764824570035e-05,
"loss": 0.759,
"step": 4800
},
{
"ar_loss": 0.5755,
"epoch": 0.7369529252519176,
"fm_loss": 0.187,
"grad_norm": 1.971647024154663,
"learning_rate": 4.934996676633874e-05,
"loss": 0.7625,
"step": 4900
},
{
"ar_loss": 0.5725,
"epoch": 0.7519927808693037,
"fm_loss": 0.1875,
"grad_norm": 5.200182914733887,
"learning_rate": 4.932290637196144e-05,
"loss": 0.76,
"step": 5000
},
{
"ar_loss": 0.5504,
"epoch": 0.015037593984962405,
"fm_loss": 0.1893,
"grad_norm": 2.8439042568206787,
"learning_rate": 4.929551301429187e-05,
"loss": 0.7396,
"step": 5100
},
{
"ar_loss": 0.5503,
"epoch": 0.03007518796992481,
"fm_loss": 0.1831,
"grad_norm": 1.733323335647583,
"learning_rate": 4.926737343223295e-05,
"loss": 0.7334,
"step": 5200
},
{
"ar_loss": 0.5514,
"epoch": 0.045112781954887216,
"fm_loss": 0.185,
"grad_norm": 2.580357074737549,
"learning_rate": 4.9238691157474316e-05,
"loss": 0.7364,
"step": 5300
},
{
"ar_loss": 0.5493,
"epoch": 0.06015037593984962,
"fm_loss": 0.1835,
"grad_norm": 1.9058268070220947,
"learning_rate": 4.920946683143935e-05,
"loss": 0.7327,
"step": 5400
},
{
"ar_loss": 0.5497,
"epoch": 0.07518796992481203,
"fm_loss": 0.184,
"grad_norm": 1.3233968019485474,
"learning_rate": 4.91797011076734e-05,
"loss": 0.7337,
"step": 5500
},
{
"ar_loss": 0.5466,
"epoch": 0.09022556390977443,
"fm_loss": 0.1829,
"grad_norm": 1.8950995206832886,
"learning_rate": 4.9149394651829086e-05,
"loss": 0.7295,
"step": 5600
},
{
"ar_loss": 0.5495,
"epoch": 0.10526315789473684,
"fm_loss": 0.1829,
"grad_norm": 1.7234010696411133,
"learning_rate": 4.911854814165145e-05,
"loss": 0.7324,
"step": 5700
},
{
"ar_loss": 0.5475,
"epoch": 0.12030075187969924,
"fm_loss": 0.1821,
"grad_norm": 2.068098783493042,
"learning_rate": 4.908716226696284e-05,
"loss": 0.7295,
"step": 5800
},
{
"ar_loss": 0.5457,
"epoch": 0.13533834586466165,
"fm_loss": 0.1837,
"grad_norm": 1.4124906063079834,
"learning_rate": 4.905523772964739e-05,
"loss": 0.7294,
"step": 5900
},
{
"ar_loss": 0.5504,
"epoch": 0.15037593984962405,
"fm_loss": 0.1828,
"grad_norm": 1.7072114944458008,
"learning_rate": 4.902277524363543e-05,
"loss": 0.7332,
"step": 6000
},
{
"ar_loss": 0.5454,
"epoch": 0.16541353383458646,
"fm_loss": 0.181,
"grad_norm": 1.737274169921875,
"learning_rate": 4.898977553488743e-05,
"loss": 0.7264,
"step": 6100
},
{
"ar_loss": 0.5501,
"epoch": 0.18045112781954886,
"fm_loss": 0.1808,
"grad_norm": 2.1727452278137207,
"learning_rate": 4.895623934137783e-05,
"loss": 0.7309,
"step": 6200
},
{
"ar_loss": 0.5439,
"epoch": 0.19548872180451127,
"fm_loss": 0.1798,
"grad_norm": 2.8658437728881836,
"learning_rate": 4.892216741307848e-05,
"loss": 0.7237,
"step": 6300
},
{
"ar_loss": 0.5438,
"epoch": 0.21052631578947367,
"fm_loss": 0.1812,
"grad_norm": 1.8495811223983765,
"learning_rate": 4.888756051194193e-05,
"loss": 0.725,
"step": 6400
},
{
"ar_loss": 0.5391,
"epoch": 0.22556390977443608,
"fm_loss": 0.1894,
"grad_norm": 2.1281821727752686,
"learning_rate": 4.885241941188435e-05,
"loss": 0.7286,
"step": 6500
},
{
"ar_loss": 0.5437,
"epoch": 0.24060150375939848,
"fm_loss": 0.1787,
"grad_norm": 1.7707561254501343,
"learning_rate": 4.881674489876822e-05,
"loss": 0.7224,
"step": 6600
},
{
"ar_loss": 0.5413,
"epoch": 0.2556390977443609,
"fm_loss": 0.1795,
"grad_norm": 1.4524445533752441,
"learning_rate": 4.878053777038478e-05,
"loss": 0.7208,
"step": 6700
},
{
"ar_loss": 0.5411,
"epoch": 0.2706766917293233,
"fm_loss": 0.18,
"grad_norm": 2.2196991443634033,
"learning_rate": 4.8743798836436166e-05,
"loss": 0.7212,
"step": 6800
},
{
"ar_loss": 0.5395,
"epoch": 0.2857142857142857,
"fm_loss": 0.1778,
"grad_norm": 3.7135586738586426,
"learning_rate": 4.8706528918517326e-05,
"loss": 0.7173,
"step": 6900
},
{
"ar_loss": 0.5397,
"epoch": 0.3007518796992481,
"fm_loss": 0.179,
"grad_norm": 2.551405191421509,
"learning_rate": 4.866872885009762e-05,
"loss": 0.7188,
"step": 7000
},
{
"ar_loss": 0.5433,
"epoch": 0.3157894736842105,
"fm_loss": 0.1769,
"grad_norm": 1.5937398672103882,
"learning_rate": 4.863039947650221e-05,
"loss": 0.7202,
"step": 7100
},
{
"ar_loss": 0.537,
"epoch": 0.3308270676691729,
"fm_loss": 0.1778,
"grad_norm": 2.1410443782806396,
"learning_rate": 4.859154165489313e-05,
"loss": 0.7148,
"step": 7200
},
{
"ar_loss": 0.5345,
"epoch": 0.3458646616541353,
"fm_loss": 0.1786,
"grad_norm": 1.9549566507339478,
"learning_rate": 4.855215625425012e-05,
"loss": 0.7131,
"step": 7300
},
{
"ar_loss": 0.5385,
"epoch": 0.3609022556390977,
"fm_loss": 0.1786,
"grad_norm": 1.6920804977416992,
"learning_rate": 4.851224415535123e-05,
"loss": 0.7171,
"step": 7400
},
{
"ar_loss": 0.5403,
"epoch": 0.37593984962406013,
"fm_loss": 0.1778,
"grad_norm": 1.460734248161316,
"learning_rate": 4.847180625075306e-05,
"loss": 0.7182,
"step": 7500
},
{
"ar_loss": 0.5333,
"epoch": 0.39097744360902253,
"fm_loss": 0.1802,
"grad_norm": 2.369119644165039,
"learning_rate": 4.8430843444770856e-05,
"loss": 0.7135,
"step": 7600
},
{
"ar_loss": 0.5343,
"epoch": 0.40601503759398494,
"fm_loss": 0.1777,
"grad_norm": 2.0863733291625977,
"learning_rate": 4.838935665345826e-05,
"loss": 0.712,
"step": 7700
},
{
"ar_loss": 0.5361,
"epoch": 0.42105263157894735,
"fm_loss": 0.1826,
"grad_norm": 2.1648268699645996,
"learning_rate": 4.834734680458682e-05,
"loss": 0.7187,
"step": 7800
},
{
"ar_loss": 0.5362,
"epoch": 0.43609022556390975,
"fm_loss": 0.1759,
"grad_norm": 3.731491804122925,
"learning_rate": 4.8304814837625275e-05,
"loss": 0.7121,
"step": 7900
},
{
"ar_loss": 0.5361,
"epoch": 0.45112781954887216,
"fm_loss": 0.1763,
"grad_norm": 3.129291296005249,
"learning_rate": 4.826176170371848e-05,
"loss": 0.7124,
"step": 8000
},
{
"ar_loss": 0.5308,
"epoch": 0.46616541353383456,
"fm_loss": 0.1803,
"grad_norm": 1.837388038635254,
"learning_rate": 4.8218188365666216e-05,
"loss": 0.7111,
"step": 8100
},
{
"ar_loss": 0.5384,
"epoch": 0.48120300751879697,
"fm_loss": 0.1765,
"grad_norm": 1.696321964263916,
"learning_rate": 4.817409579790161e-05,
"loss": 0.7148,
"step": 8200
},
{
"ar_loss": 0.5351,
"epoch": 0.49624060150375937,
"fm_loss": 0.177,
"grad_norm": 2.0662057399749756,
"learning_rate": 4.812948498646933e-05,
"loss": 0.7121,
"step": 8300
},
{
"ar_loss": 0.5358,
"epoch": 0.5112781954887218,
"fm_loss": 0.1757,
"grad_norm": 1.4204461574554443,
"learning_rate": 4.80843569290036e-05,
"loss": 0.7114,
"step": 8400
},
{
"ar_loss": 0.5316,
"epoch": 0.5263157894736842,
"fm_loss": 0.176,
"grad_norm": 3.6314117908477783,
"learning_rate": 4.80387126347058e-05,
"loss": 0.7075,
"step": 8500
},
{
"ar_loss": 0.5307,
"epoch": 0.5413533834586466,
"fm_loss": 0.1754,
"grad_norm": 2.164659023284912,
"learning_rate": 4.799255312432199e-05,
"loss": 0.706,
"step": 8600
},
{
"ar_loss": 0.5331,
"epoch": 0.556390977443609,
"fm_loss": 0.184,
"grad_norm": 1.9370899200439453,
"learning_rate": 4.794587943012e-05,
"loss": 0.7171,
"step": 8700
},
{
"ar_loss": 0.5278,
"epoch": 0.5714285714285714,
"fm_loss": 0.1738,
"grad_norm": 2.0422327518463135,
"learning_rate": 4.7898692595866415e-05,
"loss": 0.7016,
"step": 8800
},
{
"ar_loss": 0.5299,
"epoch": 0.5864661654135338,
"fm_loss": 0.1729,
"grad_norm": 1.643233060836792,
"learning_rate": 4.785099367680317e-05,
"loss": 0.7028,
"step": 8900
},
{
"ar_loss": 0.5279,
"epoch": 0.6015037593984962,
"fm_loss": 0.173,
"grad_norm": 1.7165354490280151,
"learning_rate": 4.7802783739624e-05,
"loss": 0.7009,
"step": 9000
},
{
"ar_loss": 0.5297,
"epoch": 0.6165413533834586,
"fm_loss": 0.181,
"grad_norm": 1.632948875427246,
"learning_rate": 4.7754063862450576e-05,
"loss": 0.7107,
"step": 9100
},
{
"ar_loss": 0.5266,
"epoch": 0.631578947368421,
"fm_loss": 0.2008,
"grad_norm": 2.7003791332244873,
"learning_rate": 4.770483513480837e-05,
"loss": 0.7274,
"step": 9200
},
{
"ar_loss": 0.5304,
"epoch": 0.6466165413533834,
"fm_loss": 0.1754,
"grad_norm": 2.2766501903533936,
"learning_rate": 4.765509865760233e-05,
"loss": 0.7057,
"step": 9300
},
{
"ar_loss": 0.5262,
"epoch": 0.6616541353383458,
"fm_loss": 0.18,
"grad_norm": 10.034100532531738,
"learning_rate": 4.760485554309219e-05,
"loss": 0.7061,
"step": 9400
},
{
"ar_loss": 0.52,
"epoch": 0.6766917293233082,
"fm_loss": 0.1753,
"grad_norm": 1.70254385471344,
"learning_rate": 4.7554106914867705e-05,
"loss": 0.6953,
"step": 9500
},
{
"ar_loss": 0.5249,
"epoch": 0.6917293233082706,
"fm_loss": 0.1744,
"grad_norm": 1.7009040117263794,
"learning_rate": 4.750285390782342e-05,
"loss": 0.6992,
"step": 9600
},
{
"ar_loss": 0.523,
"epoch": 0.706766917293233,
"fm_loss": 0.1739,
"grad_norm": 1.4413669109344482,
"learning_rate": 4.745109766813334e-05,
"loss": 0.6968,
"step": 9700
},
{
"ar_loss": 0.5242,
"epoch": 0.7218045112781954,
"fm_loss": 0.1727,
"grad_norm": 1.4672104120254517,
"learning_rate": 4.739883935322532e-05,
"loss": 0.6969,
"step": 9800
},
{
"ar_loss": 0.5244,
"epoch": 0.7368421052631579,
"fm_loss": 0.1728,
"grad_norm": 1.458977222442627,
"learning_rate": 4.734608013175512e-05,
"loss": 0.6972,
"step": 9900
},
{
"ar_loss": 0.5247,
"epoch": 0.7518796992481203,
"fm_loss": 0.1721,
"grad_norm": 2.412842273712158,
"learning_rate": 4.72928211835803e-05,
"loss": 0.6968,
"step": 10000
},
{
"ar_loss": 0.5206,
"epoch": 0.7669172932330827,
"fm_loss": 0.172,
"grad_norm": 2.1207447052001953,
"learning_rate": 4.723906369973386e-05,
"loss": 0.6926,
"step": 10100
},
{
"ar_loss": 0.5214,
"epoch": 0.7819548872180451,
"fm_loss": 0.1735,
"grad_norm": 2.6220345497131348,
"learning_rate": 4.7184808882397594e-05,
"loss": 0.695,
"step": 10200
},
{
"ar_loss": 0.5236,
"epoch": 0.7969924812030075,
"fm_loss": 0.1819,
"grad_norm": 1.9105595350265503,
"learning_rate": 4.713005794487515e-05,
"loss": 0.7055,
"step": 10300
},
{
"ar_loss": 0.5202,
"epoch": 0.8120300751879699,
"fm_loss": 0.1718,
"grad_norm": 2.312239646911621,
"learning_rate": 4.707481211156497e-05,
"loss": 0.692,
"step": 10400
},
{
"ar_loss": 0.5207,
"epoch": 0.8270676691729323,
"fm_loss": 0.1788,
"grad_norm": 1.709987759590149,
"learning_rate": 4.701907261793287e-05,
"loss": 0.6995,
"step": 10500
},
{
"ar_loss": 0.5178,
"epoch": 0.8421052631578947,
"fm_loss": 0.172,
"grad_norm": 2.644814968109131,
"learning_rate": 4.696284071048444e-05,
"loss": 0.6898,
"step": 10600
},
{
"ar_loss": 0.5175,
"epoch": 0.8571428571428571,
"fm_loss": 0.1737,
"grad_norm": 1.4387990236282349,
"learning_rate": 4.690611764673713e-05,
"loss": 0.6912,
"step": 10700
},
{
"ar_loss": 0.5154,
"epoch": 0.8721804511278195,
"fm_loss": 0.1727,
"grad_norm": 1.308214545249939,
"learning_rate": 4.684890469519213e-05,
"loss": 0.6881,
"step": 10800
},
{
"ar_loss": 0.5158,
"epoch": 0.8872180451127819,
"fm_loss": 0.1707,
"grad_norm": 3.754106283187866,
"learning_rate": 4.6791203135306075e-05,
"loss": 0.6865,
"step": 10900
},
{
"ar_loss": 0.5213,
"epoch": 0.9022556390977443,
"fm_loss": 0.1709,
"grad_norm": 1.5025776624679565,
"learning_rate": 4.673301425746232e-05,
"loss": 0.6922,
"step": 11000
},
{
"ar_loss": 0.5129,
"epoch": 0.9172932330827067,
"fm_loss": 0.1705,
"grad_norm": 1.8450067043304443,
"learning_rate": 4.667433936294217e-05,
"loss": 0.6835,
"step": 11100
},
{
"ar_loss": 0.5176,
"epoch": 0.9323308270676691,
"fm_loss": 0.1768,
"grad_norm": 1.772120475769043,
"learning_rate": 4.661517976389574e-05,
"loss": 0.6944,
"step": 11200
},
{
"ar_loss": 0.5117,
"epoch": 0.9473684210526315,
"fm_loss": 0.1722,
"grad_norm": 1.8486195802688599,
"learning_rate": 4.6555536783312634e-05,
"loss": 0.6839,
"step": 11300
},
{
"ar_loss": 0.5139,
"epoch": 0.9624060150375939,
"fm_loss": 0.1705,
"grad_norm": 3.964401960372925,
"learning_rate": 4.649541175499232e-05,
"loss": 0.6844,
"step": 11400
},
{
"ar_loss": 0.5154,
"epoch": 0.9774436090225563,
"fm_loss": 0.1923,
"grad_norm": 2.2421326637268066,
"learning_rate": 4.6434806023514354e-05,
"loss": 0.7077,
"step": 11500
},
{
"ar_loss": 0.5147,
"epoch": 0.9924812030075187,
"fm_loss": 0.171,
"grad_norm": 1.773090124130249,
"learning_rate": 4.6373720944208275e-05,
"loss": 0.6856,
"step": 11600
},
{
"ar_loss": 0.4967,
"epoch": 1.0075187969924813,
"fm_loss": 0.1708,
"grad_norm": 2.0621070861816406,
"learning_rate": 4.631215788312331e-05,
"loss": 0.6675,
"step": 11700
},
{
"ar_loss": 0.4895,
"epoch": 1.0225563909774436,
"fm_loss": 0.1706,
"grad_norm": 1.7303022146224976,
"learning_rate": 4.6250118216997795e-05,
"loss": 0.6601,
"step": 11800
},
{
"ar_loss": 0.4896,
"epoch": 1.037593984962406,
"fm_loss": 0.1688,
"grad_norm": 2.481395959854126,
"learning_rate": 4.618760333322846e-05,
"loss": 0.6584,
"step": 11900
},
{
"ar_loss": 0.484,
"epoch": 1.0526315789473684,
"fm_loss": 0.1686,
"grad_norm": 8.144444465637207,
"learning_rate": 4.61246146298393e-05,
"loss": 0.6527,
"step": 12000
},
{
"ar_loss": 0.4851,
"epoch": 1.0676691729323309,
"fm_loss": 0.1695,
"grad_norm": 4.188894271850586,
"learning_rate": 4.606115351545043e-05,
"loss": 0.6546,
"step": 12100
},
{
"ar_loss": 0.4853,
"epoch": 1.0827067669172932,
"fm_loss": 0.1685,
"grad_norm": 2.112070083618164,
"learning_rate": 4.599722140924645e-05,
"loss": 0.6538,
"step": 12200
},
{
"ar_loss": 0.4862,
"epoch": 1.0977443609022557,
"fm_loss": 0.1684,
"grad_norm": 5.455174922943115,
"learning_rate": 4.593281974094483e-05,
"loss": 0.6547,
"step": 12300
},
{
"ar_loss": 0.4887,
"epoch": 1.112781954887218,
"fm_loss": 0.1682,
"grad_norm": 2.762221574783325,
"learning_rate": 4.5867949950763864e-05,
"loss": 0.6569,
"step": 12400
},
{
"ar_loss": 0.4829,
"epoch": 1.1278195488721805,
"fm_loss": 0.1683,
"grad_norm": 1.505560278892517,
"learning_rate": 4.5802613489390487e-05,
"loss": 0.6511,
"step": 12500
},
{
"ar_loss": 0.487,
"epoch": 1.1428571428571428,
"fm_loss": 0.1686,
"grad_norm": 12.36954402923584,
"learning_rate": 4.5736811817947824e-05,
"loss": 0.6557,
"step": 12600
},
{
"ar_loss": 0.4796,
"epoch": 1.1578947368421053,
"fm_loss": 0.1675,
"grad_norm": 1.7835297584533691,
"learning_rate": 4.5670546407962525e-05,
"loss": 0.6471,
"step": 12700
},
{
"ar_loss": 0.4841,
"epoch": 1.1729323308270676,
"fm_loss": 0.1685,
"grad_norm": 1.500798225402832,
"learning_rate": 4.560381874133186e-05,
"loss": 0.6526,
"step": 12800
},
{
"ar_loss": 0.4818,
"epoch": 1.1879699248120301,
"fm_loss": 0.1682,
"grad_norm": 3.4301960468292236,
"learning_rate": 4.553663031029055e-05,
"loss": 0.65,
"step": 12900
},
{
"ar_loss": 0.4821,
"epoch": 1.2030075187969924,
"fm_loss": 0.1699,
"grad_norm": 1.9192440509796143,
"learning_rate": 4.546898261737745e-05,
"loss": 0.6519,
"step": 13000
},
{
"ar_loss": 0.4854,
"epoch": 1.218045112781955,
"fm_loss": 0.1688,
"grad_norm": 2.5409817695617676,
"learning_rate": 4.540087717540188e-05,
"loss": 0.6542,
"step": 13100
},
{
"ar_loss": 0.4807,
"epoch": 1.2330827067669172,
"fm_loss": 0.1684,
"grad_norm": 1.6167821884155273,
"learning_rate": 4.533231550740985e-05,
"loss": 0.6491,
"step": 13200
},
{
"ar_loss": 0.4793,
"epoch": 1.2481203007518797,
"fm_loss": 0.1673,
"grad_norm": 2.658431053161621,
"learning_rate": 4.526329914664999e-05,
"loss": 0.6466,
"step": 13300
},
{
"ar_loss": 0.4802,
"epoch": 1.263157894736842,
"fm_loss": 0.1678,
"grad_norm": 1.7002625465393066,
"learning_rate": 4.51938296365392e-05,
"loss": 0.6481,
"step": 13400
},
{
"ar_loss": 0.4799,
"epoch": 1.2781954887218046,
"fm_loss": 0.1694,
"grad_norm": 2.5322110652923584,
"learning_rate": 4.5123908530628254e-05,
"loss": 0.6493,
"step": 13500
},
{
"ar_loss": 0.4807,
"epoch": 1.2932330827067668,
"fm_loss": 0.1674,
"grad_norm": 1.8980706930160522,
"learning_rate": 4.5053537392566946e-05,
"loss": 0.6481,
"step": 13600
},
{
"ar_loss": 0.4818,
"epoch": 1.3082706766917294,
"fm_loss": 0.1688,
"grad_norm": 1.619292140007019,
"learning_rate": 4.4982717796069176e-05,
"loss": 0.6507,
"step": 13700
},
{
"ar_loss": 0.4841,
"epoch": 1.3233082706766917,
"fm_loss": 0.1684,
"grad_norm": 1.7827472686767578,
"learning_rate": 4.491145132487775e-05,
"loss": 0.6526,
"step": 13800
},
{
"ar_loss": 0.4794,
"epoch": 1.3383458646616542,
"fm_loss": 0.1779,
"grad_norm": 2.303715944290161,
"learning_rate": 4.483973957272895e-05,
"loss": 0.6572,
"step": 13900
},
{
"ar_loss": 0.4729,
"epoch": 1.3533834586466165,
"fm_loss": 0.1767,
"grad_norm": 2.095193386077881,
"learning_rate": 4.476758414331691e-05,
"loss": 0.6496,
"step": 14000
},
{
"ar_loss": 0.4765,
"epoch": 1.368421052631579,
"fm_loss": 0.1679,
"grad_norm": 3.301593065261841,
"learning_rate": 4.4694986650257754e-05,
"loss": 0.6445,
"step": 14100
},
{
"ar_loss": 0.4787,
"epoch": 1.3834586466165413,
"fm_loss": 0.1675,
"grad_norm": 4.087191581726074,
"learning_rate": 4.462194871705347e-05,
"loss": 0.6462,
"step": 14200
},
{
"ar_loss": 0.4788,
"epoch": 1.3984962406015038,
"fm_loss": 0.1666,
"grad_norm": 1.5592633485794067,
"learning_rate": 4.4548471977055665e-05,
"loss": 0.6455,
"step": 14300
},
{
"ar_loss": 0.4747,
"epoch": 1.413533834586466,
"fm_loss": 0.1682,
"grad_norm": 7.437148571014404,
"learning_rate": 4.447455807342901e-05,
"loss": 0.6429,
"step": 14400
},
{
"ar_loss": 0.4798,
"epoch": 1.4285714285714286,
"fm_loss": 0.1674,
"grad_norm": 1.5794994831085205,
"learning_rate": 4.440020865911446e-05,
"loss": 0.6472,
"step": 14500
},
{
"ar_loss": 0.4729,
"epoch": 1.443609022556391,
"fm_loss": 0.1674,
"grad_norm": 2.327096700668335,
"learning_rate": 4.432542539679235e-05,
"loss": 0.6404,
"step": 14600
},
{
"ar_loss": 0.4711,
"epoch": 1.4586466165413534,
"fm_loss": 0.1699,
"grad_norm": 1.5044869184494019,
"learning_rate": 4.425020995884517e-05,
"loss": 0.641,
"step": 14700
},
{
"ar_loss": 0.4757,
"epoch": 1.4736842105263157,
"fm_loss": 0.1678,
"grad_norm": 1.8027641773223877,
"learning_rate": 4.41745640273202e-05,
"loss": 0.6435,
"step": 14800
},
{
"ar_loss": 0.4727,
"epoch": 1.4887218045112782,
"fm_loss": 0.1716,
"grad_norm": 1.5030885934829712,
"learning_rate": 4.4098489293891845e-05,
"loss": 0.6443,
"step": 14900
},
{
"ar_loss": 0.4741,
"epoch": 1.5037593984962405,
"fm_loss": 0.1678,
"grad_norm": 2.254826068878174,
"learning_rate": 4.4021987459823834e-05,
"loss": 0.6419,
"step": 15000
},
{
"ar_loss": 0.4636,
"epoch": 2.015368065160596,
"fm_loss": 0.1667,
"grad_norm": 5.636004447937012,
"learning_rate": 4.3687046036407485e-05,
"loss": 0.6303,
"step": 15100
},
{
"ar_loss": 0.4622,
"epoch": 2.0307361303211926,
"fm_loss": 0.1678,
"grad_norm": 2.2144827842712402,
"learning_rate": 4.360656750389484e-05,
"loss": 0.63,
"step": 15200
},
{
"ar_loss": 0.4643,
"epoch": 2.0461041954817887,
"fm_loss": 0.1659,
"grad_norm": 1.623849868774414,
"learning_rate": 4.3525654376107785e-05,
"loss": 0.6302,
"step": 15300
},
{
"ar_loss": 0.4617,
"epoch": 2.061472260642385,
"fm_loss": 0.1671,
"grad_norm": 10.88976001739502,
"learning_rate": 4.344430854294155e-05,
"loss": 0.6288,
"step": 15400
},
{
"ar_loss": 0.4622,
"epoch": 2.0768403258029813,
"fm_loss": 0.1679,
"grad_norm": 2.079127550125122,
"learning_rate": 4.3362531904398086e-05,
"loss": 0.6301,
"step": 15500
},
{
"ar_loss": 0.4593,
"epoch": 2.092208390963578,
"fm_loss": 0.1663,
"grad_norm": 3.9787468910217285,
"learning_rate": 4.3280326370541716e-05,
"loss": 0.6256,
"step": 15600
},
{
"ar_loss": 0.4606,
"epoch": 2.107576456124174,
"fm_loss": 0.1686,
"grad_norm": 2.992798089981079,
"learning_rate": 4.3197693861454505e-05,
"loss": 0.6292,
"step": 15700
},
{
"ar_loss": 0.4612,
"epoch": 2.1229445212847704,
"fm_loss": 0.1693,
"grad_norm": 1.920535922050476,
"learning_rate": 4.31146363071914e-05,
"loss": 0.6305,
"step": 15800
},
{
"ar_loss": 0.4626,
"epoch": 2.1383125864453665,
"fm_loss": 0.1793,
"grad_norm": 1.652984380722046,
"learning_rate": 4.303115564773521e-05,
"loss": 0.6419,
"step": 15900
},
{
"ar_loss": 0.4583,
"epoch": 2.153680651605963,
"fm_loss": 0.1665,
"grad_norm": 1.9141411781311035,
"learning_rate": 4.294725383295121e-05,
"loss": 0.6248,
"step": 16000
},
{
"ar_loss": 0.4646,
"epoch": 2.169048716766559,
"fm_loss": 0.1762,
"grad_norm": 1.7625855207443237,
"learning_rate": 4.286293282254165e-05,
"loss": 0.6407,
"step": 16100
},
{
"ar_loss": 0.4622,
"epoch": 2.1844167819271556,
"fm_loss": 0.1674,
"grad_norm": 1.4725229740142822,
"learning_rate": 4.2778194585999965e-05,
"loss": 0.6295,
"step": 16200
},
{
"ar_loss": 0.4642,
"epoch": 2.1997848470877517,
"fm_loss": 0.1654,
"grad_norm": 2.979617118835449,
"learning_rate": 4.269304110256479e-05,
"loss": 0.6296,
"step": 16300
},
{
"ar_loss": 0.4605,
"epoch": 2.2151529122483478,
"fm_loss": 0.1656,
"grad_norm": 4.4237470626831055,
"learning_rate": 4.2607474361173714e-05,
"loss": 0.6262,
"step": 16400
},
{
"ar_loss": 0.4597,
"epoch": 2.2305209774089443,
"fm_loss": 0.1673,
"grad_norm": 3.3088274002075195,
"learning_rate": 4.2521496360416834e-05,
"loss": 0.627,
"step": 16500
},
{
"ar_loss": 0.4588,
"epoch": 2.2458890425695404,
"fm_loss": 0.166,
"grad_norm": 2.4934027194976807,
"learning_rate": 4.243510910849006e-05,
"loss": 0.6248,
"step": 16600
},
{
"ar_loss": 0.4577,
"epoch": 2.261257107730137,
"fm_loss": 0.1654,
"grad_norm": 3.4698071479797363,
"learning_rate": 4.234831462314822e-05,
"loss": 0.6231,
"step": 16700
},
{
"ar_loss": 0.459,
"epoch": 2.276625172890733,
"fm_loss": 0.1663,
"grad_norm": 1.6527596712112427,
"learning_rate": 4.226111493165793e-05,
"loss": 0.6253,
"step": 16800
},
{
"ar_loss": 0.4563,
"epoch": 2.2919932380513295,
"fm_loss": 0.1647,
"grad_norm": 2.62147855758667,
"learning_rate": 4.217351207075024e-05,
"loss": 0.621,
"step": 16900
},
{
"ar_loss": 0.4583,
"epoch": 2.3073613032119256,
"fm_loss": 0.1651,
"grad_norm": 2.403658151626587,
"learning_rate": 4.208550808657309e-05,
"loss": 0.6234,
"step": 17000
},
{
"ar_loss": 0.4565,
"epoch": 2.322729368372522,
"fm_loss": 0.1651,
"grad_norm": 2.243856906890869,
"learning_rate": 4.199710503464345e-05,
"loss": 0.6216,
"step": 17100
},
{
"ar_loss": 0.456,
"epoch": 2.338097433533118,
"fm_loss": 0.1651,
"grad_norm": 2.199291229248047,
"learning_rate": 4.190830497979938e-05,
"loss": 0.6211,
"step": 17200
},
{
"ar_loss": 0.4532,
"epoch": 2.3534654986937147,
"fm_loss": 0.1656,
"grad_norm": 2.230714797973633,
"learning_rate": 4.1819109996151775e-05,
"loss": 0.6188,
"step": 17300
},
{
"ar_loss": 0.4526,
"epoch": 2.3688335638543108,
"fm_loss": 0.1651,
"grad_norm": 2.124840259552002,
"learning_rate": 4.172952216703588e-05,
"loss": 0.6176,
"step": 17400
},
{
"ar_loss": 0.4502,
"epoch": 2.384201629014907,
"fm_loss": 0.165,
"grad_norm": 2.1426265239715576,
"learning_rate": 4.1639543584962726e-05,
"loss": 0.6152,
"step": 17500
},
{
"ar_loss": 0.4542,
"epoch": 2.3995696941755034,
"fm_loss": 0.1651,
"grad_norm": 4.364583492279053,
"learning_rate": 4.154917635157015e-05,
"loss": 0.6193,
"step": 17600
},
{
"ar_loss": 0.4558,
"epoch": 2.4149377593360994,
"fm_loss": 0.1651,
"grad_norm": 1.8425495624542236,
"learning_rate": 4.145842257757377e-05,
"loss": 0.621,
"step": 17700
},
{
"ar_loss": 0.4533,
"epoch": 2.430305824496696,
"fm_loss": 0.1657,
"grad_norm": 1.7966125011444092,
"learning_rate": 4.136728438271768e-05,
"loss": 0.619,
"step": 17800
},
{
"ar_loss": 0.4493,
"epoch": 2.445673889657292,
"fm_loss": 0.1651,
"grad_norm": 2.5523791313171387,
"learning_rate": 4.127576389572488e-05,
"loss": 0.6144,
"step": 17900
},
{
"ar_loss": 0.4501,
"epoch": 2.4610419548178886,
"fm_loss": 0.1647,
"grad_norm": 2.2898178100585938,
"learning_rate": 4.1183863254247655e-05,
"loss": 0.6148,
"step": 18000
},
{
"ar_loss": 0.4509,
"epoch": 2.4764100199784846,
"fm_loss": 0.1635,
"grad_norm": 2.085273504257202,
"learning_rate": 4.109158460481758e-05,
"loss": 0.6144,
"step": 18100
},
{
"ar_loss": 0.4515,
"epoch": 2.491778085139081,
"fm_loss": 0.1651,
"grad_norm": 2.7309823036193848,
"learning_rate": 4.0998930102795377e-05,
"loss": 0.6166,
"step": 18200
},
{
"ar_loss": 0.4491,
"epoch": 2.5071461502996772,
"fm_loss": 0.1645,
"grad_norm": 1.9515680074691772,
"learning_rate": 4.090590191232061e-05,
"loss": 0.6137,
"step": 18300
},
{
"ar_loss": 0.4541,
"epoch": 2.5225142154602738,
"fm_loss": 0.1639,
"grad_norm": 3.2586522102355957,
"learning_rate": 4.0812502206261096e-05,
"loss": 0.618,
"step": 18400
},
{
"ar_loss": 0.4463,
"epoch": 2.53788228062087,
"fm_loss": 0.1645,
"grad_norm": 3.2903804779052734,
"learning_rate": 4.071873316616219e-05,
"loss": 0.6108,
"step": 18500
},
{
"ar_loss": 0.4465,
"epoch": 2.553250345781466,
"fm_loss": 0.1659,
"grad_norm": 3.0805041790008545,
"learning_rate": 4.062459698219583e-05,
"loss": 0.6124,
"step": 18600
},
{
"ar_loss": 0.4476,
"epoch": 2.5686184109420624,
"fm_loss": 0.1643,
"grad_norm": 10.558906555175781,
"learning_rate": 4.053009585310933e-05,
"loss": 0.6119,
"step": 18700
},
{
"ar_loss": 0.4461,
"epoch": 2.5839864761026585,
"fm_loss": 0.164,
"grad_norm": 1.7276713848114014,
"learning_rate": 4.04352319861741e-05,
"loss": 0.6102,
"step": 18800
},
{
"ar_loss": 0.4486,
"epoch": 2.599354541263255,
"fm_loss": 0.1655,
"grad_norm": 2.4759597778320312,
"learning_rate": 4.034000759713401e-05,
"loss": 0.6142,
"step": 18900
},
{
"ar_loss": 0.4461,
"epoch": 2.614722606423851,
"fm_loss": 0.164,
"grad_norm": 1.5683785676956177,
"learning_rate": 4.024442491015372e-05,
"loss": 0.6101,
"step": 19000
},
{
"ar_loss": 0.4495,
"epoch": 2.6300906715844476,
"fm_loss": 0.1637,
"grad_norm": 3.866807460784912,
"learning_rate": 4.014848615776666e-05,
"loss": 0.6132,
"step": 19100
},
{
"ar_loss": 0.447,
"epoch": 2.6454587367450437,
"fm_loss": 0.1635,
"grad_norm": 1.7105318307876587,
"learning_rate": 4.00521935808229e-05,
"loss": 0.6105,
"step": 19200
},
{
"ar_loss": 0.4454,
"epoch": 2.66082680190564,
"fm_loss": 0.1634,
"grad_norm": 2.6716930866241455,
"learning_rate": 3.995554942843687e-05,
"loss": 0.6088,
"step": 19300
},
{
"ar_loss": 0.4491,
"epoch": 2.6761948670662363,
"fm_loss": 0.1647,
"grad_norm": 1.8674702644348145,
"learning_rate": 3.9858555957934715e-05,
"loss": 0.6137,
"step": 19400
},
{
"ar_loss": 0.445,
"epoch": 2.691562932226833,
"fm_loss": 0.1631,
"grad_norm": 2.926995277404785,
"learning_rate": 3.976121543480169e-05,
"loss": 0.6081,
"step": 19500
},
{
"ar_loss": 0.4416,
"epoch": 2.706930997387429,
"fm_loss": 0.1641,
"grad_norm": 2.341655731201172,
"learning_rate": 3.966353013262917e-05,
"loss": 0.6057,
"step": 19600
},
{
"ar_loss": 0.4406,
"epoch": 2.722299062548025,
"fm_loss": 0.1648,
"grad_norm": 1.6259019374847412,
"learning_rate": 3.956550233306155e-05,
"loss": 0.6054,
"step": 19700
},
{
"ar_loss": 0.4439,
"epoch": 2.7376671277086215,
"fm_loss": 0.1626,
"grad_norm": 3.2317330837249756,
"learning_rate": 3.946713432574299e-05,
"loss": 0.6065,
"step": 19800
},
{
"ar_loss": 0.4445,
"epoch": 2.7530351928692176,
"fm_loss": 0.1636,
"grad_norm": 2.053748369216919,
"learning_rate": 3.936842840826391e-05,
"loss": 0.6081,
"step": 19900
},
{
"ar_loss": 0.444,
"epoch": 2.768403258029814,
"fm_loss": 0.1635,
"grad_norm": 3.10992431640625,
"learning_rate": 3.9269386886107304e-05,
"loss": 0.6076,
"step": 20000
},
{
"ar_loss": 0.4431,
"epoch": 2.78377132319041,
"fm_loss": 0.1634,
"grad_norm": 1.6531990766525269,
"learning_rate": 3.9170012072594944e-05,
"loss": 0.6065,
"step": 20100
},
{
"ar_loss": 0.4413,
"epoch": 2.7991393883510067,
"fm_loss": 0.1642,
"grad_norm": 2.239360809326172,
"learning_rate": 3.90703062888333e-05,
"loss": 0.6055,
"step": 20200
},
{
"ar_loss": 0.4411,
"epoch": 2.814507453511603,
"fm_loss": 0.1635,
"grad_norm": 2.6573100090026855,
"learning_rate": 3.8970271863659366e-05,
"loss": 0.6046,
"step": 20300
},
{
"ar_loss": 0.4388,
"epoch": 2.8298755186721993,
"fm_loss": 0.1639,
"grad_norm": 2.9059882164001465,
"learning_rate": 3.886991113358621e-05,
"loss": 0.6027,
"step": 20400
},
{
"ar_loss": 0.4413,
"epoch": 2.8452435838327954,
"fm_loss": 0.1647,
"grad_norm": 1.4836231470108032,
"learning_rate": 3.876922644274847e-05,
"loss": 0.6061,
"step": 20500
},
{
"ar_loss": 0.4394,
"epoch": 2.860611648993392,
"fm_loss": 0.1639,
"grad_norm": 3.667249917984009,
"learning_rate": 3.866822014284753e-05,
"loss": 0.6033,
"step": 20600
},
{
"ar_loss": 0.437,
"epoch": 2.875979714153988,
"fm_loss": 0.1631,
"grad_norm": 2.724766731262207,
"learning_rate": 3.8566894593096646e-05,
"loss": 0.6001,
"step": 20700
},
{
"ar_loss": 0.4408,
"epoch": 2.891347779314584,
"fm_loss": 0.1634,
"grad_norm": 3.302048683166504,
"learning_rate": 3.846525216016581e-05,
"loss": 0.6043,
"step": 20800
},
{
"ar_loss": 0.4374,
"epoch": 2.9067158444751806,
"fm_loss": 0.1632,
"grad_norm": 5.427467346191406,
"learning_rate": 3.836329521812651e-05,
"loss": 0.6006,
"step": 20900
},
{
"ar_loss": 0.4404,
"epoch": 2.922083909635777,
"fm_loss": 0.1632,
"grad_norm": 2.2337872982025146,
"learning_rate": 3.826102614839621e-05,
"loss": 0.6037,
"step": 21000
},
{
"ar_loss": 0.4384,
"epoch": 2.937451974796373,
"fm_loss": 0.1629,
"grad_norm": 13.053716659545898,
"learning_rate": 3.815844733968281e-05,
"loss": 0.6014,
"step": 21100
},
{
"ar_loss": 0.4374,
"epoch": 2.9528200399569693,
"fm_loss": 0.1623,
"grad_norm": 2.655907392501831,
"learning_rate": 3.8055561187928776e-05,
"loss": 0.5997,
"step": 21200
},
{
"ar_loss": 0.4344,
"epoch": 2.968188105117566,
"fm_loss": 0.163,
"grad_norm": 4.967069625854492,
"learning_rate": 3.795237009625523e-05,
"loss": 0.5974,
"step": 21300
},
{
"ar_loss": 0.4384,
"epoch": 2.983556170278162,
"fm_loss": 0.1639,
"grad_norm": 1.5047295093536377,
"learning_rate": 3.784887647490581e-05,
"loss": 0.6023,
"step": 21400
},
{
"ar_loss": 0.435,
"epoch": 2.9989242354387584,
"fm_loss": 0.1626,
"grad_norm": 2.5669796466827393,
"learning_rate": 3.774508274119035e-05,
"loss": 0.5976,
"step": 21500
},
{
"ar_loss": 0.4195,
"epoch": 3.0142923005993545,
"fm_loss": 0.1629,
"grad_norm": 2.004462718963623,
"learning_rate": 3.764099131942846e-05,
"loss": 0.5825,
"step": 21600
},
{
"ar_loss": 0.4153,
"epoch": 3.029660365759951,
"fm_loss": 0.1619,
"grad_norm": 4.897035121917725,
"learning_rate": 3.753660464089285e-05,
"loss": 0.5773,
"step": 21700
},
{
"ar_loss": 0.414,
"epoch": 3.045028430920547,
"fm_loss": 0.1628,
"grad_norm": 5.358692169189453,
"learning_rate": 3.743192514375257e-05,
"loss": 0.5768,
"step": 21800
},
{
"ar_loss": 0.417,
"epoch": 3.060396496081143,
"fm_loss": 0.1624,
"grad_norm": 2.0926132202148438,
"learning_rate": 3.732695527301609e-05,
"loss": 0.5793,
"step": 21900
},
{
"ar_loss": 0.4168,
"epoch": 3.0757645612417397,
"fm_loss": 0.1626,
"grad_norm": 2.1865603923797607,
"learning_rate": 3.722169748047413e-05,
"loss": 0.5793,
"step": 22000
},
{
"ar_loss": 0.4142,
"epoch": 3.0911326264023358,
"fm_loss": 0.163,
"grad_norm": 1.9334361553192139,
"learning_rate": 3.711615422464244e-05,
"loss": 0.5772,
"step": 22100
},
{
"ar_loss": 0.4156,
"epoch": 3.1065006915629323,
"fm_loss": 0.163,
"grad_norm": 2.2630386352539062,
"learning_rate": 3.701032797070436e-05,
"loss": 0.5785,
"step": 22200
},
{
"ar_loss": 0.4122,
"epoch": 3.1218687567235284,
"fm_loss": 0.1706,
"grad_norm": 1.959439754486084,
"learning_rate": 3.690422119045325e-05,
"loss": 0.5828,
"step": 22300
},
{
"ar_loss": 0.4142,
"epoch": 3.137236821884125,
"fm_loss": 0.1717,
"grad_norm": 2.2783379554748535,
"learning_rate": 3.6797836362234745e-05,
"loss": 0.5859,
"step": 22400
},
{
"ar_loss": 0.4142,
"epoch": 3.152604887044721,
"fm_loss": 0.1634,
"grad_norm": 2.1327381134033203,
"learning_rate": 3.669117597088885e-05,
"loss": 0.5776,
"step": 22500
},
{
"ar_loss": 0.4125,
"epoch": 3.1679729522053175,
"fm_loss": 0.1665,
"grad_norm": 2.315673828125,
"learning_rate": 3.658424250769195e-05,
"loss": 0.579,
"step": 22600
},
{
"ar_loss": 0.4148,
"epoch": 3.1833410173659136,
"fm_loss": 0.1629,
"grad_norm": 3.1342201232910156,
"learning_rate": 3.647703847029858e-05,
"loss": 0.5778,
"step": 22700
},
{
"ar_loss": 0.4164,
"epoch": 3.19870908252651,
"fm_loss": 0.1615,
"grad_norm": 2.276103973388672,
"learning_rate": 3.6369566362683115e-05,
"loss": 0.5778,
"step": 22800
},
{
"ar_loss": 0.415,
"epoch": 3.214077147687106,
"fm_loss": 0.1634,
"grad_norm": 3.8148179054260254,
"learning_rate": 3.626182869508124e-05,
"loss": 0.5784,
"step": 22900
},
{
"ar_loss": 0.4135,
"epoch": 3.2294452128477023,
"fm_loss": 0.163,
"grad_norm": 2.029802083969116,
"learning_rate": 3.6153827983931395e-05,
"loss": 0.5765,
"step": 23000
},
{
"ar_loss": 0.4108,
"epoch": 3.2448132780082988,
"fm_loss": 0.1616,
"grad_norm": 1.9012216329574585,
"learning_rate": 3.6045566751815906e-05,
"loss": 0.5724,
"step": 23100
},
{
"ar_loss": 0.4118,
"epoch": 3.260181343168895,
"fm_loss": 0.1631,
"grad_norm": 1.9454518556594849,
"learning_rate": 3.593704752740214e-05,
"loss": 0.5749,
"step": 23200
},
{
"ar_loss": 0.4114,
"epoch": 3.2755494083294914,
"fm_loss": 0.1646,
"grad_norm": 5.499399662017822,
"learning_rate": 3.5828272845383395e-05,
"loss": 0.576,
"step": 23300
},
{
"ar_loss": 0.4099,
"epoch": 3.2909174734900875,
"fm_loss": 0.1622,
"grad_norm": 2.5215775966644287,
"learning_rate": 3.571924524641973e-05,
"loss": 0.5721,
"step": 23400
},
{
"ar_loss": 0.4132,
"epoch": 3.306285538650684,
"fm_loss": 0.1619,
"grad_norm": 1.9290796518325806,
"learning_rate": 3.56099672770786e-05,
"loss": 0.575,
"step": 23500
},
{
"ar_loss": 0.4106,
"epoch": 3.32165360381128,
"fm_loss": 0.1653,
"grad_norm": 3.0975518226623535,
"learning_rate": 3.550044148977539e-05,
"loss": 0.5759,
"step": 23600
},
{
"ar_loss": 0.4109,
"epoch": 3.3370216689718766,
"fm_loss": 0.1625,
"grad_norm": 3.079251289367676,
"learning_rate": 3.539067044271378e-05,
"loss": 0.5734,
"step": 23700
},
{
"ar_loss": 0.4106,
"epoch": 3.3523897341324727,
"fm_loss": 0.162,
"grad_norm": 3.6910111904144287,
"learning_rate": 3.5280656699826016e-05,
"loss": 0.5726,
"step": 23800
},
{
"ar_loss": 0.4081,
"epoch": 3.367757799293069,
"fm_loss": 0.1616,
"grad_norm": 2.8438339233398438,
"learning_rate": 3.5170402830713004e-05,
"loss": 0.5698,
"step": 23900
},
{
"ar_loss": 0.4122,
"epoch": 3.3831258644536653,
"fm_loss": 0.1616,
"grad_norm": 5.298975467681885,
"learning_rate": 3.505991141058431e-05,
"loss": 0.5738,
"step": 24000
},
{
"ar_loss": 0.4107,
"epoch": 3.3984939296142613,
"fm_loss": 0.1614,
"grad_norm": 2.1549863815307617,
"learning_rate": 3.494918502019798e-05,
"loss": 0.5721,
"step": 24100
},
{
"ar_loss": 0.4095,
"epoch": 3.413861994774858,
"fm_loss": 0.1615,
"grad_norm": 2.1645219326019287,
"learning_rate": 3.483822624580031e-05,
"loss": 0.571,
"step": 24200
},
{
"ar_loss": 0.4102,
"epoch": 3.429230059935454,
"fm_loss": 0.1623,
"grad_norm": 3.294847249984741,
"learning_rate": 3.472703767906539e-05,
"loss": 0.5724,
"step": 24300
},
{
"ar_loss": 0.4116,
"epoch": 3.4445981250960505,
"fm_loss": 0.1628,
"grad_norm": 2.6318018436431885,
"learning_rate": 3.461562191703459e-05,
"loss": 0.5744,
"step": 24400
},
{
"ar_loss": 0.4116,
"epoch": 3.4599661902566465,
"fm_loss": 0.161,
"grad_norm": 4.470794677734375,
"learning_rate": 3.450398156205592e-05,
"loss": 0.5726,
"step": 24500
},
{
"ar_loss": 0.4101,
"epoch": 3.475334255417243,
"fm_loss": 0.1609,
"grad_norm": 3.786229133605957,
"learning_rate": 3.43921192217232e-05,
"loss": 0.571,
"step": 24600
},
{
"ar_loss": 0.4085,
"epoch": 3.490702320577839,
"fm_loss": 0.1606,
"grad_norm": 3.3834242820739746,
"learning_rate": 3.42800375088152e-05,
"loss": 0.5691,
"step": 24700
},
{
"ar_loss": 0.4106,
"epoch": 3.5060703857384357,
"fm_loss": 0.1612,
"grad_norm": 2.6395816802978516,
"learning_rate": 3.4167739041234595e-05,
"loss": 0.5718,
"step": 24800
},
{
"ar_loss": 0.4095,
"epoch": 3.5214384508990317,
"fm_loss": 0.161,
"grad_norm": 3.1765825748443604,
"learning_rate": 3.405522644194682e-05,
"loss": 0.5705,
"step": 24900
},
{
"ar_loss": 0.4098,
"epoch": 3.5368065160596283,
"fm_loss": 0.1619,
"grad_norm": 2.2531938552856445,
"learning_rate": 3.3942502338918795e-05,
"loss": 0.5716,
"step": 25000
},
{
"ar_loss": 0.4074,
"epoch": 3.5521745812202243,
"fm_loss": 0.1624,
"grad_norm": 2.6430165767669678,
"learning_rate": 3.382956936505755e-05,
"loss": 0.5698,
"step": 25100
},
{
"ar_loss": 0.4096,
"epoch": 3.5675426463808204,
"fm_loss": 0.1607,
"grad_norm": 2.1325323581695557,
"learning_rate": 3.371643015814874e-05,
"loss": 0.5703,
"step": 25200
},
{
"ar_loss": 0.4111,
"epoch": 3.582910711541417,
"fm_loss": 0.1614,
"grad_norm": 2.9714601039886475,
"learning_rate": 3.360308736079502e-05,
"loss": 0.5725,
"step": 25300
},
{
"ar_loss": 0.4086,
"epoch": 3.5982787767020135,
"fm_loss": 0.1612,
"grad_norm": 2.1280746459960938,
"learning_rate": 3.348954362035432e-05,
"loss": 0.5698,
"step": 25400
},
{
"ar_loss": 0.4074,
"epoch": 3.6136468418626095,
"fm_loss": 0.1608,
"grad_norm": 8.301246643066406,
"learning_rate": 3.337580158887802e-05,
"loss": 0.5681,
"step": 25500
},
{
"ar_loss": 0.4054,
"epoch": 3.6290149070232056,
"fm_loss": 0.164,
"grad_norm": 2.632568120956421,
"learning_rate": 3.326186392304901e-05,
"loss": 0.5694,
"step": 25600
},
{
"ar_loss": 0.4076,
"epoch": 3.644382972183802,
"fm_loss": 0.1616,
"grad_norm": 4.138895511627197,
"learning_rate": 3.314773328411962e-05,
"loss": 0.5692,
"step": 25700
},
{
"ar_loss": 0.4059,
"epoch": 3.659751037344398,
"fm_loss": 0.1616,
"grad_norm": 2.5377886295318604,
"learning_rate": 3.3033412337849466e-05,
"loss": 0.5675,
"step": 25800
},
{
"ar_loss": 0.4085,
"epoch": 3.6751191025049947,
"fm_loss": 0.1617,
"grad_norm": 2.334913730621338,
"learning_rate": 3.2918903754443195e-05,
"loss": 0.5702,
"step": 25900
},
{
"ar_loss": 0.4064,
"epoch": 3.690487167665591,
"fm_loss": 0.1625,
"grad_norm": 1.9625128507614136,
"learning_rate": 3.2804210208488114e-05,
"loss": 0.5689,
"step": 26000
},
{
"ar_loss": 0.4057,
"epoch": 3.7058552328261873,
"fm_loss": 0.1617,
"grad_norm": 2.2251503467559814,
"learning_rate": 3.268933437889172e-05,
"loss": 0.5674,
"step": 26100
},
{
"ar_loss": 0.4066,
"epoch": 3.7212232979867834,
"fm_loss": 0.1617,
"grad_norm": 3.2878170013427734,
"learning_rate": 3.2574278948819105e-05,
"loss": 0.5683,
"step": 26200
},
{
"ar_loss": 0.4097,
"epoch": 3.7365913631473795,
"fm_loss": 0.1618,
"grad_norm": 3.281667947769165,
"learning_rate": 3.2459046605630334e-05,
"loss": 0.5715,
"step": 26300
},
{
"ar_loss": 0.406,
"epoch": 3.751959428307976,
"fm_loss": 0.1607,
"grad_norm": 2.7066891193389893,
"learning_rate": 3.234364004081763e-05,
"loss": 0.5667,
"step": 26400
},
{
"ar_loss": 0.4041,
"epoch": 3.7673274934685725,
"fm_loss": 0.1614,
"grad_norm": 4.11818790435791,
"learning_rate": 3.222806194994253e-05,
"loss": 0.5655,
"step": 26500
},
{
"ar_loss": 0.4039,
"epoch": 3.7826955586291686,
"fm_loss": 0.1611,
"grad_norm": 2.0963382720947266,
"learning_rate": 3.211231503257292e-05,
"loss": 0.565,
"step": 26600
},
{
"ar_loss": 0.4039,
"epoch": 3.7980636237897647,
"fm_loss": 0.1612,
"grad_norm": 8.769935607910156,
"learning_rate": 3.199640199221998e-05,
"loss": 0.5651,
"step": 26700
},
{
"ar_loss": 0.407,
"epoch": 3.813431688950361,
"fm_loss": 0.1614,
"grad_norm": 4.367818832397461,
"learning_rate": 3.188032553627505e-05,
"loss": 0.5684,
"step": 26800
},
{
"ar_loss": 0.4025,
"epoch": 3.8287997541109573,
"fm_loss": 0.1612,
"grad_norm": 3.1772165298461914,
"learning_rate": 3.1764088375946355e-05,
"loss": 0.5637,
"step": 26900
},
{
"ar_loss": 0.4051,
"epoch": 3.844167819271554,
"fm_loss": 0.1606,
"grad_norm": 2.3004674911499023,
"learning_rate": 3.1647693226195764e-05,
"loss": 0.5657,
"step": 27000
},
{
"ar_loss": 0.4023,
"epoch": 3.85953588443215,
"fm_loss": 0.1609,
"grad_norm": 2.438415050506592,
"learning_rate": 3.1531142805675244e-05,
"loss": 0.5633,
"step": 27100
},
{
"ar_loss": 0.405,
"epoch": 3.8749039495927464,
"fm_loss": 0.1615,
"grad_norm": 2.6293303966522217,
"learning_rate": 3.141443983666349e-05,
"loss": 0.5666,
"step": 27200
},
{
"ar_loss": 0.4022,
"epoch": 3.8902720147533425,
"fm_loss": 0.1609,
"grad_norm": 3.9701802730560303,
"learning_rate": 3.1297587045002265e-05,
"loss": 0.5631,
"step": 27300
},
{
"ar_loss": 0.4008,
"epoch": 3.9056400799139386,
"fm_loss": 0.1615,
"grad_norm": 1.8459994792938232,
"learning_rate": 3.118058716003277e-05,
"loss": 0.5623,
"step": 27400
},
{
"ar_loss": 0.4058,
"epoch": 3.921008145074535,
"fm_loss": 0.1612,
"grad_norm": 10.706082344055176,
"learning_rate": 3.106344291453185e-05,
"loss": 0.567,
"step": 27500
},
{
"ar_loss": 0.4022,
"epoch": 3.9363762102351316,
"fm_loss": 0.1609,
"grad_norm": 2.411083221435547,
"learning_rate": 3.09461570446482e-05,
"loss": 0.5632,
"step": 27600
},
{
"ar_loss": 0.4021,
"epoch": 3.9517442753957277,
"fm_loss": 0.1609,
"grad_norm": 1.9293774366378784,
"learning_rate": 3.082873228983847e-05,
"loss": 0.563,
"step": 27700
},
{
"ar_loss": 0.4021,
"epoch": 3.967112340556324,
"fm_loss": 0.1607,
"grad_norm": 2.12009334564209,
"learning_rate": 3.071117139280325e-05,
"loss": 0.5628,
"step": 27800
},
{
"ar_loss": 0.4052,
"epoch": 3.9824804057169203,
"fm_loss": 0.16,
"grad_norm": 1.866605281829834,
"learning_rate": 3.059347709942299e-05,
"loss": 0.5652,
"step": 27900
},
{
"ar_loss": 0.404,
"epoch": 3.9978484708775164,
"fm_loss": 0.1614,
"grad_norm": 3.483856439590454,
"learning_rate": 3.0475652158693912e-05,
"loss": 0.5653,
"step": 28000
},
{
"ar_loss": 0.3897,
"epoch": 4.0132165360381125,
"fm_loss": 0.1654,
"grad_norm": 6.412163734436035,
"learning_rate": 3.0357699322663784e-05,
"loss": 0.5552,
"step": 28100
},
{
"ar_loss": 0.3912,
"epoch": 4.028584601198709,
"fm_loss": 0.1622,
"grad_norm": 2.0246286392211914,
"learning_rate": 3.023962134636763e-05,
"loss": 0.5535,
"step": 28200
},
{
"ar_loss": 0.3892,
"epoch": 4.0439526663593055,
"fm_loss": 0.1619,
"grad_norm": 1.9690488576889038,
"learning_rate": 3.0121420987763393e-05,
"loss": 0.5511,
"step": 28300
},
{
"ar_loss": 0.3891,
"epoch": 4.059320731519902,
"fm_loss": 0.161,
"grad_norm": 2.742201805114746,
"learning_rate": 3.0003101007667485e-05,
"loss": 0.55,
"step": 28400
},
{
"ar_loss": 0.3889,
"epoch": 4.074688796680498,
"fm_loss": 0.1608,
"grad_norm": 15.931248664855957,
"learning_rate": 2.9884664169690356e-05,
"loss": 0.5496,
"step": 28500
},
{
"ar_loss": 0.3909,
"epoch": 4.090056861841094,
"fm_loss": 0.16,
"grad_norm": 2.156740665435791,
"learning_rate": 2.976611324017191e-05,
"loss": 0.5509,
"step": 28600
},
{
"ar_loss": 0.388,
"epoch": 4.105424927001691,
"fm_loss": 0.1659,
"grad_norm": 5.583569049835205,
"learning_rate": 2.9647450988116893e-05,
"loss": 0.5539,
"step": 28700
},
{
"ar_loss": 0.3909,
"epoch": 4.120792992162286,
"fm_loss": 0.1597,
"grad_norm": 2.5985569953918457,
"learning_rate": 2.9528680185130214e-05,
"loss": 0.5505,
"step": 28800
},
{
"ar_loss": 0.3892,
"epoch": 4.136161057322883,
"fm_loss": 0.1596,
"grad_norm": 3.106172800064087,
"learning_rate": 2.9409803605352237e-05,
"loss": 0.5488,
"step": 28900
},
{
"ar_loss": 0.3863,
"epoch": 4.151529122483479,
"fm_loss": 0.1611,
"grad_norm": 2.211129665374756,
"learning_rate": 2.929082402539395e-05,
"loss": 0.5473,
"step": 29000
},
{
"ar_loss": 0.39,
"epoch": 4.166897187644076,
"fm_loss": 0.1602,
"grad_norm": 1.949877142906189,
"learning_rate": 2.9171744224272113e-05,
"loss": 0.5502,
"step": 29100
},
{
"ar_loss": 0.387,
"epoch": 4.1822652528046715,
"fm_loss": 0.1646,
"grad_norm": 8.61090087890625,
"learning_rate": 2.9052566983344388e-05,
"loss": 0.5516,
"step": 29200
},
{
"ar_loss": 0.391,
"epoch": 4.197633317965268,
"fm_loss": 0.1596,
"grad_norm": 2.0502259731292725,
"learning_rate": 2.893329508624433e-05,
"loss": 0.5506,
"step": 29300
},
{
"ar_loss": 0.3897,
"epoch": 4.213001383125865,
"fm_loss": 0.1603,
"grad_norm": 2.44155216217041,
"learning_rate": 2.8813931318816395e-05,
"loss": 0.55,
"step": 29400
},
{
"ar_loss": 0.387,
"epoch": 4.228369448286461,
"fm_loss": 0.1616,
"grad_norm": 2.004561185836792,
"learning_rate": 2.869447846905085e-05,
"loss": 0.5487,
"step": 29500
},
{
"ar_loss": 0.3875,
"epoch": 4.243737513447057,
"fm_loss": 0.1598,
"grad_norm": 5.270979881286621,
"learning_rate": 2.8574939327018685e-05,
"loss": 0.5473,
"step": 29600
},
{
"ar_loss": 0.3873,
"epoch": 4.259105578607653,
"fm_loss": 0.1605,
"grad_norm": 6.422144412994385,
"learning_rate": 2.8455316684806404e-05,
"loss": 0.5478,
"step": 29700
},
{
"ar_loss": 0.3856,
"epoch": 4.27447364376825,
"fm_loss": 0.1609,
"grad_norm": 9.331077575683594,
"learning_rate": 2.833561333645085e-05,
"loss": 0.5465,
"step": 29800
},
{
"ar_loss": 0.3887,
"epoch": 4.289841708928845,
"fm_loss": 0.1603,
"grad_norm": 3.2531440258026123,
"learning_rate": 2.8215832077873928e-05,
"loss": 0.549,
"step": 29900
},
{
"ar_loss": 0.3879,
"epoch": 4.305209774089442,
"fm_loss": 0.1616,
"grad_norm": 4.434958457946777,
"learning_rate": 2.8095975706817283e-05,
"loss": 0.5495,
"step": 30000
},
{
"ar_loss": 0.3873,
"epoch": 4.015372790161415,
"fm_loss": 0.16,
"grad_norm": 2.223679542541504,
"learning_rate": 2.7964958224364322e-05,
"loss": 0.5474,
"step": 30100
},
{
"ar_loss": 0.3861,
"epoch": 4.0307455803228285,
"fm_loss": 0.1608,
"grad_norm": 2.2263758182525635,
"learning_rate": 2.7844916800976393e-05,
"loss": 0.5468,
"step": 30200
},
{
"ar_loss": 0.385,
"epoch": 4.046118370484243,
"fm_loss": 0.1591,
"grad_norm": 3.2188937664031982,
"learning_rate": 2.772480888770234e-05,
"loss": 0.5442,
"step": 30300
},
{
"ar_loss": 0.3838,
"epoch": 4.061491160645657,
"fm_loss": 0.1605,
"grad_norm": 2.1031410694122314,
"learning_rate": 2.7604637291640594e-05,
"loss": 0.5443,
"step": 30400
},
{
"ar_loss": 0.3874,
"epoch": 4.076863950807072,
"fm_loss": 0.1604,
"grad_norm": 3.646202564239502,
"learning_rate": 2.748440482137793e-05,
"loss": 0.5478,
"step": 30500
},
{
"ar_loss": 0.3888,
"epoch": 4.092236740968485,
"fm_loss": 0.16,
"grad_norm": 3.0315663814544678,
"learning_rate": 2.7364114286923865e-05,
"loss": 0.5488,
"step": 30600
},
{
"ar_loss": 0.386,
"epoch": 4.1076095311299,
"fm_loss": 0.1597,
"grad_norm": 2.8235671520233154,
"learning_rate": 2.7243768499644946e-05,
"loss": 0.5457,
"step": 30700
},
{
"ar_loss": 0.3889,
"epoch": 4.122982321291314,
"fm_loss": 0.161,
"grad_norm": 2.2409422397613525,
"learning_rate": 2.7123370272199055e-05,
"loss": 0.55,
"step": 30800
},
{
"ar_loss": 0.3876,
"epoch": 4.138355111452729,
"fm_loss": 0.1612,
"grad_norm": 2.19675874710083,
"learning_rate": 2.700292241846971e-05,
"loss": 0.5488,
"step": 30900
},
{
"ar_loss": 0.3871,
"epoch": 4.153727901614143,
"fm_loss": 0.1601,
"grad_norm": 2.340182304382324,
"learning_rate": 2.6882427753500245e-05,
"loss": 0.5471,
"step": 31000
},
{
"ar_loss": 0.3879,
"epoch": 4.169100691775557,
"fm_loss": 0.16,
"grad_norm": 3.1240203380584717,
"learning_rate": 2.676188909342801e-05,
"loss": 0.5479,
"step": 31100
},
{
"ar_loss": 0.3832,
"epoch": 4.184473481936972,
"fm_loss": 0.1603,
"grad_norm": 2.1297569274902344,
"learning_rate": 2.664130925541865e-05,
"loss": 0.5436,
"step": 31200
},
{
"ar_loss": 0.3889,
"epoch": 4.199846272098386,
"fm_loss": 0.1599,
"grad_norm": 3.824352264404297,
"learning_rate": 2.6520691057600155e-05,
"loss": 0.5488,
"step": 31300
},
{
"ar_loss": 0.388,
"epoch": 4.2152190622598,
"fm_loss": 0.1605,
"grad_norm": 1.8939049243927002,
"learning_rate": 2.6400037318997046e-05,
"loss": 0.5485,
"step": 31400
},
{
"ar_loss": 0.3886,
"epoch": 4.230591852421214,
"fm_loss": 0.1609,
"grad_norm": 2.476659059524536,
"learning_rate": 2.6279350859464502e-05,
"loss": 0.5495,
"step": 31500
},
{
"ar_loss": 0.3888,
"epoch": 4.245964642582629,
"fm_loss": 0.1602,
"grad_norm": 3.117105007171631,
"learning_rate": 2.6158634499622425e-05,
"loss": 0.549,
"step": 31600
},
{
"ar_loss": 0.386,
"epoch": 4.261337432744043,
"fm_loss": 0.1603,
"grad_norm": 2.931809663772583,
"learning_rate": 2.6037891060789514e-05,
"loss": 0.5464,
"step": 31700
},
{
"ar_loss": 0.382,
"epoch": 4.276710222905457,
"fm_loss": 0.1591,
"grad_norm": 2.1063570976257324,
"learning_rate": 2.5917123364917378e-05,
"loss": 0.5411,
"step": 31800
},
{
"ar_loss": 0.3863,
"epoch": 4.292083013066872,
"fm_loss": 0.1601,
"grad_norm": 4.796857833862305,
"learning_rate": 2.5796334234524533e-05,
"loss": 0.5463,
"step": 31900
},
{
"ar_loss": 0.3849,
"epoch": 4.307455803228286,
"fm_loss": 0.1612,
"grad_norm": 2.840190887451172,
"learning_rate": 2.567552649263044e-05,
"loss": 0.5461,
"step": 32000
},
{
"ar_loss": 0.3876,
"epoch": 4.3228285933897,
"fm_loss": 0.1686,
"grad_norm": 2.64380145072937,
"learning_rate": 2.5554702962689563e-05,
"loss": 0.5562,
"step": 32100
},
{
"ar_loss": 0.3855,
"epoch": 4.338201383551114,
"fm_loss": 0.1599,
"grad_norm": 4.3930535316467285,
"learning_rate": 2.5433866468525342e-05,
"loss": 0.5453,
"step": 32200
},
{
"ar_loss": 0.3825,
"epoch": 4.353574173712529,
"fm_loss": 0.1587,
"grad_norm": 2.1165313720703125,
"learning_rate": 2.531301983426419e-05,
"loss": 0.5412,
"step": 32300
},
{
"ar_loss": 0.3849,
"epoch": 4.3689469638739435,
"fm_loss": 0.1601,
"grad_norm": 3.832287073135376,
"learning_rate": 2.519216588426955e-05,
"loss": 0.5449,
"step": 32400
},
{
"ar_loss": 0.3872,
"epoch": 4.384319754035357,
"fm_loss": 0.16,
"grad_norm": 2.8910598754882812,
"learning_rate": 2.507130744307581e-05,
"loss": 0.5472,
"step": 32500
},
{
"ar_loss": 0.3847,
"epoch": 4.399692544196772,
"fm_loss": 0.1603,
"grad_norm": 2.692793607711792,
"learning_rate": 2.4950447335322335e-05,
"loss": 0.545,
"step": 32600
},
{
"ar_loss": 0.3831,
"epoch": 4.415065334358186,
"fm_loss": 0.1595,
"grad_norm": 2.35683536529541,
"learning_rate": 2.482958838568746e-05,
"loss": 0.5426,
"step": 32700
},
{
"ar_loss": 0.3861,
"epoch": 4.4304381245196005,
"fm_loss": 0.159,
"grad_norm": 1.9129565954208374,
"learning_rate": 2.4708733418822427e-05,
"loss": 0.5451,
"step": 32800
},
{
"ar_loss": 0.3868,
"epoch": 4.445810914681014,
"fm_loss": 0.1599,
"grad_norm": 4.045548915863037,
"learning_rate": 2.4587885259285396e-05,
"loss": 0.5466,
"step": 32900
},
{
"ar_loss": 0.3834,
"epoch": 4.461183704842429,
"fm_loss": 0.1601,
"grad_norm": 2.0863168239593506,
"learning_rate": 2.446704673147544e-05,
"loss": 0.5436,
"step": 33000
},
{
"ar_loss": 0.3837,
"epoch": 4.476556495003843,
"fm_loss": 0.1596,
"grad_norm": 2.455124855041504,
"learning_rate": 2.4346220659566513e-05,
"loss": 0.5433,
"step": 33100
},
{
"ar_loss": 0.3841,
"epoch": 4.4919292851652575,
"fm_loss": 0.1593,
"grad_norm": 2.560342311859131,
"learning_rate": 2.4225409867441483e-05,
"loss": 0.5434,
"step": 33200
},
{
"ar_loss": 0.3813,
"epoch": 4.507302075326672,
"fm_loss": 0.1589,
"grad_norm": 13.45487117767334,
"learning_rate": 2.4104617178626075e-05,
"loss": 0.5402,
"step": 33300
},
{
"ar_loss": 0.3854,
"epoch": 4.522674865488086,
"fm_loss": 0.1606,
"grad_norm": 1.9280191659927368,
"learning_rate": 2.3983845416222943e-05,
"loss": 0.546,
"step": 33400
},
{
"ar_loss": 0.3861,
"epoch": 4.538047655649501,
"fm_loss": 0.1599,
"grad_norm": 4.483201503753662,
"learning_rate": 2.386309740284562e-05,
"loss": 0.5461,
"step": 33500
},
{
"ar_loss": 0.3837,
"epoch": 4.553420445810914,
"fm_loss": 0.1597,
"grad_norm": 2.269436836242676,
"learning_rate": 2.3742375960552628e-05,
"loss": 0.5434,
"step": 33600
},
{
"ar_loss": 0.3845,
"epoch": 4.568793235972329,
"fm_loss": 0.1595,
"grad_norm": 2.5172126293182373,
"learning_rate": 2.3621683910781458e-05,
"loss": 0.544,
"step": 33700
},
{
"ar_loss": 0.3859,
"epoch": 4.584166026133743,
"fm_loss": 0.1586,
"grad_norm": 29.834903717041016,
"learning_rate": 2.3501024074282665e-05,
"loss": 0.5445,
"step": 33800
},
{
"ar_loss": 0.3864,
"epoch": 4.599538816295158,
"fm_loss": 0.16,
"grad_norm": 2.567058563232422,
"learning_rate": 2.3380399271053953e-05,
"loss": 0.5464,
"step": 33900
},
{
"ar_loss": 0.3836,
"epoch": 4.614911606456571,
"fm_loss": 0.1598,
"grad_norm": 2.2282016277313232,
"learning_rate": 2.3259812320274206e-05,
"loss": 0.5434,
"step": 34000
},
{
"ar_loss": 0.3875,
"epoch": 4.630284396617986,
"fm_loss": 0.1595,
"grad_norm": 2.367128849029541,
"learning_rate": 2.313926604023767e-05,
"loss": 0.5469,
"step": 34100
},
{
"ar_loss": 0.3826,
"epoch": 4.645657186779401,
"fm_loss": 0.1587,
"grad_norm": 2.4427452087402344,
"learning_rate": 2.3018763248288043e-05,
"loss": 0.5413,
"step": 34200
},
{
"ar_loss": 0.3856,
"epoch": 4.661029976940815,
"fm_loss": 0.1597,
"grad_norm": 3.8236427307128906,
"learning_rate": 2.289830676075265e-05,
"loss": 0.5453,
"step": 34300
},
{
"ar_loss": 0.3857,
"epoch": 4.676402767102229,
"fm_loss": 0.16,
"grad_norm": 3.789658546447754,
"learning_rate": 2.2777899392876596e-05,
"loss": 0.5457,
"step": 34400
},
{
"ar_loss": 0.3843,
"epoch": 4.691775557263643,
"fm_loss": 0.1598,
"grad_norm": 7.402520656585693,
"learning_rate": 2.265754395875703e-05,
"loss": 0.5441,
"step": 34500
},
{
"ar_loss": 0.3828,
"epoch": 4.707148347425058,
"fm_loss": 0.1606,
"grad_norm": 2.951808452606201,
"learning_rate": 2.2537243271277286e-05,
"loss": 0.5434,
"step": 34600
},
{
"ar_loss": 0.3828,
"epoch": 4.722521137586472,
"fm_loss": 0.1602,
"grad_norm": 2.665632724761963,
"learning_rate": 2.241700014204121e-05,
"loss": 0.543,
"step": 34700
},
{
"ar_loss": 0.3818,
"epoch": 4.737893927747886,
"fm_loss": 0.1606,
"grad_norm": 2.390002727508545,
"learning_rate": 2.2296817381307425e-05,
"loss": 0.5423,
"step": 34800
},
{
"ar_loss": 0.3864,
"epoch": 4.753266717909301,
"fm_loss": 0.1599,
"grad_norm": 5.915409564971924,
"learning_rate": 2.2176697797923653e-05,
"loss": 0.5463,
"step": 34900
},
{
"ar_loss": 0.3823,
"epoch": 4.768639508070715,
"fm_loss": 0.165,
"grad_norm": 3.208481550216675,
"learning_rate": 2.205664419926106e-05,
"loss": 0.5473,
"step": 35000
},
{
"ar_loss": 0.384,
"epoch": 4.784012298232129,
"fm_loss": 0.1604,
"grad_norm": 2.6016440391540527,
"learning_rate": 2.1936659391148682e-05,
"loss": 0.5444,
"step": 35100
},
{
"ar_loss": 0.3839,
"epoch": 4.799385088393543,
"fm_loss": 0.16,
"grad_norm": 7.024256229400635,
"learning_rate": 2.1816746177807777e-05,
"loss": 0.5438,
"step": 35200
},
{
"ar_loss": 0.3835,
"epoch": 4.814757878554958,
"fm_loss": 0.1604,
"grad_norm": 2.983032464981079,
"learning_rate": 2.169690736178636e-05,
"loss": 0.5439,
"step": 35300
},
{
"ar_loss": 0.3857,
"epoch": 4.830130668716372,
"fm_loss": 0.1596,
"grad_norm": 3.4045064449310303,
"learning_rate": 2.1577145743893652e-05,
"loss": 0.5453,
"step": 35400
},
{
"ar_loss": 0.3799,
"epoch": 4.845503458877786,
"fm_loss": 0.1606,
"grad_norm": 3.3932929039001465,
"learning_rate": 2.1457464123134654e-05,
"loss": 0.5406,
"step": 35500
},
{
"ar_loss": 0.3815,
"epoch": 4.860876249039201,
"fm_loss": 0.1618,
"grad_norm": 5.131358623504639,
"learning_rate": 2.1337865296644693e-05,
"loss": 0.5433,
"step": 35600
},
{
"ar_loss": 0.3839,
"epoch": 4.876249039200615,
"fm_loss": 0.1597,
"grad_norm": 2.781935453414917,
"learning_rate": 2.1218352059624125e-05,
"loss": 0.5436,
"step": 35700
},
{
"ar_loss": 0.3836,
"epoch": 4.8916218293620295,
"fm_loss": 0.1593,
"grad_norm": 4.221926689147949,
"learning_rate": 2.1098927205272888e-05,
"loss": 0.5429,
"step": 35800
},
{
"ar_loss": 0.3839,
"epoch": 4.906994619523443,
"fm_loss": 0.1595,
"grad_norm": 3.9555509090423584,
"learning_rate": 2.0979593524725326e-05,
"loss": 0.5434,
"step": 35900
},
{
"ar_loss": 0.3815,
"epoch": 4.922367409684858,
"fm_loss": 0.1597,
"grad_norm": 2.448594331741333,
"learning_rate": 2.0860353806984917e-05,
"loss": 0.5412,
"step": 36000
},
{
"ar_loss": 0.3813,
"epoch": 4.937740199846272,
"fm_loss": 0.1604,
"grad_norm": 2.342075824737549,
"learning_rate": 2.074121083885907e-05,
"loss": 0.5417,
"step": 36100
},
{
"ar_loss": 0.3825,
"epoch": 4.9531129900076865,
"fm_loss": 0.1601,
"grad_norm": 7.287113666534424,
"learning_rate": 2.0622167404894034e-05,
"loss": 0.5426,
"step": 36200
},
{
"ar_loss": 0.3835,
"epoch": 4.9684857801691,
"fm_loss": 0.1583,
"grad_norm": 15.53018569946289,
"learning_rate": 2.0503226287309786e-05,
"loss": 0.5418,
"step": 36300
},
{
"ar_loss": 0.3832,
"epoch": 4.983858570330515,
"fm_loss": 0.1783,
"grad_norm": 36.44465637207031,
"learning_rate": 2.0384390265935027e-05,
"loss": 0.5614,
"step": 36400
},
{
"ar_loss": 0.382,
"epoch": 4.99923136049193,
"fm_loss": 0.1595,
"grad_norm": 4.1146745681762695,
"learning_rate": 2.02656621181422e-05,
"loss": 0.5415,
"step": 36500
},
{
"ar_loss": 0.3779,
"epoch": 5.014604150653343,
"fm_loss": 0.1594,
"grad_norm": 2.2293853759765625,
"learning_rate": 2.0147044618782585e-05,
"loss": 0.5373,
"step": 36600
},
{
"ar_loss": 0.3786,
"epoch": 5.029976940814758,
"fm_loss": 0.1599,
"grad_norm": 3.3400254249572754,
"learning_rate": 2.0028540540121444e-05,
"loss": 0.5386,
"step": 36700
},
{
"ar_loss": 0.3761,
"epoch": 5.045349730976172,
"fm_loss": 0.1594,
"grad_norm": 2.30407452583313,
"learning_rate": 1.9910152651773235e-05,
"loss": 0.5355,
"step": 36800
},
{
"ar_loss": 0.3785,
"epoch": 5.060722521137587,
"fm_loss": 0.1592,
"grad_norm": 2.3268890380859375,
"learning_rate": 1.9791883720636864e-05,
"loss": 0.5376,
"step": 36900
},
{
"ar_loss": 0.3771,
"epoch": 5.076095311299,
"fm_loss": 0.1583,
"grad_norm": 4.636923789978027,
"learning_rate": 1.967373651083106e-05,
"loss": 0.5354,
"step": 37000
},
{
"ar_loss": 0.3741,
"epoch": 5.091468101460415,
"fm_loss": 0.1595,
"grad_norm": 8.473295211791992,
"learning_rate": 1.9555713783629714e-05,
"loss": 0.5336,
"step": 37100
},
{
"ar_loss": 0.3763,
"epoch": 5.10684089162183,
"fm_loss": 0.1588,
"grad_norm": 2.227837562561035,
"learning_rate": 1.9437818297397403e-05,
"loss": 0.5351,
"step": 37200
},
{
"ar_loss": 0.3778,
"epoch": 5.1222136817832435,
"fm_loss": 0.1597,
"grad_norm": 2.5238685607910156,
"learning_rate": 1.9320052807524874e-05,
"loss": 0.5376,
"step": 37300
},
{
"ar_loss": 0.3799,
"epoch": 5.137586471944658,
"fm_loss": 0.16,
"grad_norm": 3.3944637775421143,
"learning_rate": 1.9202420066364678e-05,
"loss": 0.5399,
"step": 37400
},
{
"ar_loss": 0.3767,
"epoch": 5.152959262106072,
"fm_loss": 0.1584,
"grad_norm": 16.466928482055664,
"learning_rate": 1.908492282316683e-05,
"loss": 0.5351,
"step": 37500
},
{
"ar_loss": 0.3743,
"epoch": 5.168332052267487,
"fm_loss": 0.1591,
"grad_norm": 2.3075056076049805,
"learning_rate": 1.8967563824014563e-05,
"loss": 0.5334,
"step": 37600
},
{
"ar_loss": 0.3821,
"epoch": 5.1837048424289005,
"fm_loss": 0.1595,
"grad_norm": 2.2367005348205566,
"learning_rate": 1.8850345811760152e-05,
"loss": 0.5415,
"step": 37700
},
{
"ar_loss": 0.3746,
"epoch": 5.199077632590315,
"fm_loss": 0.1596,
"grad_norm": 1.8881235122680664,
"learning_rate": 1.873327152596077e-05,
"loss": 0.5342,
"step": 37800
},
{
"ar_loss": 0.3776,
"epoch": 5.21445042275173,
"fm_loss": 0.1589,
"grad_norm": 2.585137128829956,
"learning_rate": 1.861634370281453e-05,
"loss": 0.5365,
"step": 37900
},
{
"ar_loss": 0.3767,
"epoch": 5.229823212913144,
"fm_loss": 0.1594,
"grad_norm": 3.3256664276123047,
"learning_rate": 1.849956507509647e-05,
"loss": 0.5361,
"step": 38000
},
{
"ar_loss": 0.3767,
"epoch": 5.245196003074558,
"fm_loss": 0.1601,
"grad_norm": 2.189387559890747,
"learning_rate": 1.838293837209472e-05,
"loss": 0.5368,
"step": 38100
},
{
"ar_loss": 0.3765,
"epoch": 5.260568793235972,
"fm_loss": 0.1638,
"grad_norm": 2.6017394065856934,
"learning_rate": 1.8266466319546712e-05,
"loss": 0.5403,
"step": 38200
},
{
"ar_loss": 0.3755,
"epoch": 5.275941583397387,
"fm_loss": 0.1598,
"grad_norm": 3.207014322280884,
"learning_rate": 1.8150151639575466e-05,
"loss": 0.5354,
"step": 38300
},
{
"ar_loss": 0.3772,
"epoch": 5.291314373558801,
"fm_loss": 0.1589,
"grad_norm": 3.0959692001342773,
"learning_rate": 1.8033997050625966e-05,
"loss": 0.536,
"step": 38400
},
{
"ar_loss": 0.3742,
"epoch": 5.306687163720215,
"fm_loss": 0.1587,
"grad_norm": 2.948103666305542,
"learning_rate": 1.791800526740165e-05,
"loss": 0.5329,
"step": 38500
},
{
"ar_loss": 0.3768,
"epoch": 5.322059953881629,
"fm_loss": 0.1633,
"grad_norm": 3.3506641387939453,
"learning_rate": 1.7802179000800927e-05,
"loss": 0.5401,
"step": 38600
},
{
"ar_loss": 0.3748,
"epoch": 5.337432744043044,
"fm_loss": 0.1593,
"grad_norm": 1.7779144048690796,
"learning_rate": 1.768652095785385e-05,
"loss": 0.5342,
"step": 38700
},
{
"ar_loss": 0.3771,
"epoch": 5.3528055342044585,
"fm_loss": 0.1593,
"grad_norm": 3.081190347671509,
"learning_rate": 1.7571033841658844e-05,
"loss": 0.5365,
"step": 38800
},
{
"ar_loss": 0.3769,
"epoch": 5.368178324365872,
"fm_loss": 0.159,
"grad_norm": 2.642840623855591,
"learning_rate": 1.7455720351319516e-05,
"loss": 0.5359,
"step": 38900
},
{
"ar_loss": 0.3759,
"epoch": 5.383551114527287,
"fm_loss": 0.1596,
"grad_norm": 3.235140323638916,
"learning_rate": 1.734058318188158e-05,
"loss": 0.5355,
"step": 39000
},
{
"ar_loss": 0.3728,
"epoch": 5.398923904688701,
"fm_loss": 0.1596,
"grad_norm": 2.3143417835235596,
"learning_rate": 1.7225625024269877e-05,
"loss": 0.5324,
"step": 39100
},
{
"ar_loss": 0.3762,
"epoch": 5.414296694850115,
"fm_loss": 0.1592,
"grad_norm": 2.0119452476501465,
"learning_rate": 1.711084856522548e-05,
"loss": 0.5354,
"step": 39200
},
{
"ar_loss": 0.3725,
"epoch": 5.429669485011529,
"fm_loss": 0.1596,
"grad_norm": 2.9905428886413574,
"learning_rate": 1.6996256487242894e-05,
"loss": 0.5321,
"step": 39300
},
{
"ar_loss": 0.3746,
"epoch": 5.445042275172944,
"fm_loss": 0.1635,
"grad_norm": 2.617199659347534,
"learning_rate": 1.6881851468507358e-05,
"loss": 0.5382,
"step": 39400
},
{
"ar_loss": 0.3746,
"epoch": 5.460415065334358,
"fm_loss": 0.1599,
"grad_norm": 2.1979682445526123,
"learning_rate": 1.6767636182832292e-05,
"loss": 0.5345,
"step": 39500
},
{
"ar_loss": 0.3768,
"epoch": 5.475787855495772,
"fm_loss": 0.1586,
"grad_norm": 3.3658335208892822,
"learning_rate": 1.6653613299596748e-05,
"loss": 0.5354,
"step": 39600
},
{
"ar_loss": 0.3753,
"epoch": 5.491160645657187,
"fm_loss": 0.1583,
"grad_norm": 3.347707748413086,
"learning_rate": 1.6539785483683063e-05,
"loss": 0.5336,
"step": 39700
},
{
"ar_loss": 0.3786,
"epoch": 5.506533435818601,
"fm_loss": 0.1625,
"grad_norm": 2.0319979190826416,
"learning_rate": 1.6426155395414555e-05,
"loss": 0.5411,
"step": 39800
},
{
"ar_loss": 0.3765,
"epoch": 5.521906225980016,
"fm_loss": 0.16,
"grad_norm": 2.5716593265533447,
"learning_rate": 1.631272569049336e-05,
"loss": 0.5365,
"step": 39900
},
{
"ar_loss": 0.3767,
"epoch": 5.537279016141429,
"fm_loss": 0.1596,
"grad_norm": 2.94095516204834,
"learning_rate": 1.6199499019938363e-05,
"loss": 0.5364,
"step": 40000
},
{
"ar_loss": 0.3755,
"epoch": 6.015372790161415,
"fm_loss": 0.1588,
"grad_norm": 2.66264271736145,
"learning_rate": 1.6086478030023248e-05,
"loss": 0.5343,
"step": 40100
},
{
"ar_loss": 0.3771,
"epoch": 6.0307455803228285,
"fm_loss": 0.1577,
"grad_norm": 3.340590476989746,
"learning_rate": 1.597366536221462e-05,
"loss": 0.5349,
"step": 40200
},
{
"ar_loss": 0.3748,
"epoch": 6.046118370484243,
"fm_loss": 0.1597,
"grad_norm": 2.351405620574951,
"learning_rate": 1.5861063653110292e-05,
"loss": 0.5345,
"step": 40300
},
{
"ar_loss": 0.3747,
"epoch": 6.061491160645657,
"fm_loss": 0.1608,
"grad_norm": 3.0236189365386963,
"learning_rate": 1.5748675534377683e-05,
"loss": 0.5355,
"step": 40400
},
{
"ar_loss": 0.3763,
"epoch": 6.076863950807072,
"fm_loss": 0.1588,
"grad_norm": 2.360391139984131,
"learning_rate": 1.563650363269227e-05,
"loss": 0.5351,
"step": 40500
},
{
"ar_loss": 0.377,
"epoch": 6.092236740968485,
"fm_loss": 0.1611,
"grad_norm": 2.1837239265441895,
"learning_rate": 1.5524550569676224e-05,
"loss": 0.5381,
"step": 40600
},
{
"ar_loss": 0.376,
"epoch": 6.1076095311299,
"fm_loss": 0.1585,
"grad_norm": 3.4195451736450195,
"learning_rate": 1.541281896183715e-05,
"loss": 0.5346,
"step": 40700
},
{
"ar_loss": 0.3785,
"epoch": 6.122982321291314,
"fm_loss": 0.1592,
"grad_norm": 3.0176382064819336,
"learning_rate": 1.5301311420506897e-05,
"loss": 0.5377,
"step": 40800
},
{
"ar_loss": 0.3735,
"epoch": 6.138355111452729,
"fm_loss": 0.1588,
"grad_norm": 3.1062843799591064,
"learning_rate": 1.5190030551780564e-05,
"loss": 0.5323,
"step": 40900
},
{
"ar_loss": 0.374,
"epoch": 6.153727901614143,
"fm_loss": 0.1592,
"grad_norm": 2.2580771446228027,
"learning_rate": 1.5078978956455581e-05,
"loss": 0.5332,
"step": 41000
},
{
"ar_loss": 0.3748,
"epoch": 6.169100691775557,
"fm_loss": 0.1584,
"grad_norm": 3.3398499488830566,
"learning_rate": 1.4968159229970914e-05,
"loss": 0.5333,
"step": 41100
},
{
"ar_loss": 0.377,
"epoch": 6.184473481936972,
"fm_loss": 0.1581,
"grad_norm": 4.929275989532471,
"learning_rate": 1.4857573962346411e-05,
"loss": 0.5351,
"step": 41200
},
{
"ar_loss": 0.3762,
"epoch": 6.199846272098386,
"fm_loss": 0.16,
"grad_norm": 1.9189740419387817,
"learning_rate": 1.4747225738122278e-05,
"loss": 0.5362,
"step": 41300
},
{
"ar_loss": 0.3733,
"epoch": 6.2152190622598,
"fm_loss": 0.1599,
"grad_norm": 2.381868600845337,
"learning_rate": 1.4637117136298673e-05,
"loss": 0.5333,
"step": 41400
},
{
"ar_loss": 0.3741,
"epoch": 6.230591852421214,
"fm_loss": 0.1591,
"grad_norm": 4.60919713973999,
"learning_rate": 1.452725073027541e-05,
"loss": 0.5332,
"step": 41500
},
{
"ar_loss": 0.3768,
"epoch": 6.245964642582629,
"fm_loss": 0.1579,
"grad_norm": 8.782063484191895,
"learning_rate": 1.4417629087791868e-05,
"loss": 0.5347,
"step": 41600
},
{
"ar_loss": 0.3755,
"epoch": 6.261337432744043,
"fm_loss": 0.1589,
"grad_norm": 3.957836627960205,
"learning_rate": 1.4308254770866886e-05,
"loss": 0.5344,
"step": 41700
},
{
"ar_loss": 0.3779,
"epoch": 6.276710222905457,
"fm_loss": 0.1578,
"grad_norm": 4.637214183807373,
"learning_rate": 1.4199130335738981e-05,
"loss": 0.5357,
"step": 41800
},
{
"ar_loss": 0.3745,
"epoch": 6.292083013066872,
"fm_loss": 0.1589,
"grad_norm": 4.1944804191589355,
"learning_rate": 1.409025833280655e-05,
"loss": 0.5334,
"step": 41900
},
{
"ar_loss": 0.3738,
"epoch": 6.307455803228286,
"fm_loss": 0.1589,
"grad_norm": 3.7318429946899414,
"learning_rate": 1.3981641306568299e-05,
"loss": 0.5328,
"step": 42000
},
{
"ar_loss": 0.3754,
"epoch": 6.3228285933897,
"fm_loss": 0.16,
"grad_norm": 1.8556092977523804,
"learning_rate": 1.3873281795563737e-05,
"loss": 0.5355,
"step": 42100
},
{
"ar_loss": 0.3718,
"epoch": 6.338201383551114,
"fm_loss": 0.1584,
"grad_norm": 3.1865854263305664,
"learning_rate": 1.3765182332313859e-05,
"loss": 0.5302,
"step": 42200
},
{
"ar_loss": 0.3778,
"epoch": 6.353574173712529,
"fm_loss": 0.1593,
"grad_norm": 2.2085890769958496,
"learning_rate": 1.3657345443261967e-05,
"loss": 0.5371,
"step": 42300
},
{
"ar_loss": 0.377,
"epoch": 6.3689469638739435,
"fm_loss": 0.1592,
"grad_norm": 4.625204563140869,
"learning_rate": 1.3549773648714631e-05,
"loss": 0.5363,
"step": 42400
},
{
"ar_loss": 0.3775,
"epoch": 6.384319754035357,
"fm_loss": 0.1581,
"grad_norm": 2.3030028343200684,
"learning_rate": 1.3442469462782741e-05,
"loss": 0.5356,
"step": 42500
},
{
"ar_loss": 0.3768,
"epoch": 6.399692544196772,
"fm_loss": 0.1591,
"grad_norm": 6.936190128326416,
"learning_rate": 1.3335435393322826e-05,
"loss": 0.5359,
"step": 42600
},
{
"ar_loss": 0.374,
"epoch": 6.415065334358186,
"fm_loss": 0.1591,
"grad_norm": 3.345165252685547,
"learning_rate": 1.322867394187836e-05,
"loss": 0.5331,
"step": 42700
},
{
"ar_loss": 0.377,
"epoch": 6.4304381245196005,
"fm_loss": 0.1595,
"grad_norm": 3.3900644779205322,
"learning_rate": 1.3122187603621356e-05,
"loss": 0.5366,
"step": 42800
},
{
"ar_loss": 0.3728,
"epoch": 6.445810914681014,
"fm_loss": 0.1582,
"grad_norm": 2.5638771057128906,
"learning_rate": 1.3015978867293996e-05,
"loss": 0.531,
"step": 42900
},
{
"ar_loss": 0.377,
"epoch": 6.461183704842429,
"fm_loss": 0.1588,
"grad_norm": 2.6694748401641846,
"learning_rate": 1.2910050215150526e-05,
"loss": 0.5358,
"step": 43000
},
{
"ar_loss": 0.3739,
"epoch": 6.476556495003843,
"fm_loss": 0.1593,
"grad_norm": 4.689158916473389,
"learning_rate": 1.2804404122899197e-05,
"loss": 0.5332,
"step": 43100
},
{
"ar_loss": 0.3763,
"epoch": 6.4919292851652575,
"fm_loss": 0.1589,
"grad_norm": 2.5251529216766357,
"learning_rate": 1.2699043059644444e-05,
"loss": 0.5352,
"step": 43200
},
{
"ar_loss": 0.3754,
"epoch": 6.507302075326672,
"fm_loss": 0.1582,
"grad_norm": 2.896928548812866,
"learning_rate": 1.2593969487829133e-05,
"loss": 0.5337,
"step": 43300
},
{
"ar_loss": 0.3782,
"epoch": 6.522674865488086,
"fm_loss": 0.1589,
"grad_norm": 8.51708698272705,
"learning_rate": 1.2489185863177032e-05,
"loss": 0.5371,
"step": 43400
},
{
"ar_loss": 0.3763,
"epoch": 6.538047655649501,
"fm_loss": 0.1584,
"grad_norm": 2.936885356903076,
"learning_rate": 1.2384694634635433e-05,
"loss": 0.5347,
"step": 43500
},
{
"ar_loss": 0.3744,
"epoch": 6.553420445810914,
"fm_loss": 0.1592,
"grad_norm": 2.76309871673584,
"learning_rate": 1.2280498244317883e-05,
"loss": 0.5337,
"step": 43600
},
{
"ar_loss": 0.3742,
"epoch": 6.568793235972329,
"fm_loss": 0.1586,
"grad_norm": 2.100710868835449,
"learning_rate": 1.2176599127447147e-05,
"loss": 0.5328,
"step": 43700
},
{
"ar_loss": 0.3777,
"epoch": 6.584166026133743,
"fm_loss": 0.1588,
"grad_norm": 2.9072883129119873,
"learning_rate": 1.2072999712298242e-05,
"loss": 0.5365,
"step": 43800
},
{
"ar_loss": 0.3757,
"epoch": 6.599538816295158,
"fm_loss": 0.1627,
"grad_norm": 2.924342393875122,
"learning_rate": 1.196970242014176e-05,
"loss": 0.5384,
"step": 43900
},
{
"ar_loss": 0.3763,
"epoch": 6.614911606456571,
"fm_loss": 0.1586,
"grad_norm": 2.559717893600464,
"learning_rate": 1.1866709665187205e-05,
"loss": 0.5349,
"step": 44000
},
{
"ar_loss": 0.3732,
"epoch": 6.630284396617986,
"fm_loss": 0.1588,
"grad_norm": 6.084836006164551,
"learning_rate": 1.1764023854526593e-05,
"loss": 0.532,
"step": 44100
},
{
"ar_loss": 0.3753,
"epoch": 6.645657186779401,
"fm_loss": 0.1588,
"grad_norm": 2.4638426303863525,
"learning_rate": 1.1661647388078211e-05,
"loss": 0.5341,
"step": 44200
},
{
"ar_loss": 0.3742,
"epoch": 6.661029976940815,
"fm_loss": 0.1594,
"grad_norm": 2.2063241004943848,
"learning_rate": 1.1559582658530526e-05,
"loss": 0.5335,
"step": 44300
},
{
"ar_loss": 0.3743,
"epoch": 6.676402767102229,
"fm_loss": 0.1591,
"grad_norm": 2.847790479660034,
"learning_rate": 1.1457832051286235e-05,
"loss": 0.5333,
"step": 44400
},
{
"ar_loss": 0.3724,
"epoch": 6.691775557263643,
"fm_loss": 0.1591,
"grad_norm": 5.002224445343018,
"learning_rate": 1.1356397944406566e-05,
"loss": 0.5315,
"step": 44500
},
{
"ar_loss": 0.3739,
"epoch": 6.707148347425058,
"fm_loss": 0.1592,
"grad_norm": 2.6647820472717285,
"learning_rate": 1.125528270855564e-05,
"loss": 0.5331,
"step": 44600
},
{
"ar_loss": 0.3758,
"epoch": 6.722521137586472,
"fm_loss": 0.1583,
"grad_norm": 2.1632769107818604,
"learning_rate": 1.1154488706945104e-05,
"loss": 0.5341,
"step": 44700
},
{
"ar_loss": 0.3761,
"epoch": 6.737893927747886,
"fm_loss": 0.1589,
"grad_norm": 2.196767568588257,
"learning_rate": 1.105401829527889e-05,
"loss": 0.535,
"step": 44800
},
{
"ar_loss": 0.3755,
"epoch": 6.753266717909301,
"fm_loss": 0.1585,
"grad_norm": 3.3876090049743652,
"learning_rate": 1.0953873821698153e-05,
"loss": 0.534,
"step": 44900
},
{
"ar_loss": 0.375,
"epoch": 6.768639508070715,
"fm_loss": 0.1587,
"grad_norm": 2.2706313133239746,
"learning_rate": 1.085405762672639e-05,
"loss": 0.5337,
"step": 45000
},
{
"ar_loss": 0.3739,
"epoch": 6.784012298232129,
"fm_loss": 0.159,
"grad_norm": 2.5273022651672363,
"learning_rate": 1.0754572043214773e-05,
"loss": 0.5329,
"step": 45100
},
{
"ar_loss": 0.3734,
"epoch": 6.799385088393543,
"fm_loss": 0.1596,
"grad_norm": 2.579967498779297,
"learning_rate": 1.0655419396287578e-05,
"loss": 0.533,
"step": 45200
},
{
"ar_loss": 0.3717,
"epoch": 6.814757878554958,
"fm_loss": 0.1583,
"grad_norm": 36.556278228759766,
"learning_rate": 1.0556602003287847e-05,
"loss": 0.53,
"step": 45300
},
{
"ar_loss": 0.3769,
"epoch": 6.830130668716372,
"fm_loss": 0.1586,
"grad_norm": 5.545002460479736,
"learning_rate": 1.045812217372327e-05,
"loss": 0.5355,
"step": 45400
},
{
"ar_loss": 0.3737,
"epoch": 6.845503458877786,
"fm_loss": 0.1587,
"grad_norm": 2.081123113632202,
"learning_rate": 1.0359982209212178e-05,
"loss": 0.5323,
"step": 45500
},
{
"ar_loss": 0.3722,
"epoch": 6.860876249039201,
"fm_loss": 0.1591,
"grad_norm": 3.176176071166992,
"learning_rate": 1.0262184403429739e-05,
"loss": 0.5313,
"step": 45600
},
{
"ar_loss": 0.3785,
"epoch": 6.876249039200615,
"fm_loss": 0.1584,
"grad_norm": 7.945939540863037,
"learning_rate": 1.016473104205441e-05,
"loss": 0.5369,
"step": 45700
},
{
"ar_loss": 0.3752,
"epoch": 6.8916218293620295,
"fm_loss": 0.1589,
"grad_norm": 3.0679447650909424,
"learning_rate": 1.0067624402714438e-05,
"loss": 0.5341,
"step": 45800
},
{
"ar_loss": 0.3731,
"epoch": 6.906994619523443,
"fm_loss": 0.159,
"grad_norm": 4.075259685516357,
"learning_rate": 9.970866754934677e-06,
"loss": 0.532,
"step": 45900
},
{
"ar_loss": 0.3768,
"epoch": 6.922367409684858,
"fm_loss": 0.1581,
"grad_norm": 2.1700098514556885,
"learning_rate": 9.874460360083537e-06,
"loss": 0.5349,
"step": 46000
},
{
"ar_loss": 0.3749,
"epoch": 6.937740199846272,
"fm_loss": 0.1598,
"grad_norm": 2.1425936222076416,
"learning_rate": 9.778407471320134e-06,
"loss": 0.5347,
"step": 46100
},
{
"ar_loss": 0.3719,
"epoch": 6.9531129900076865,
"fm_loss": 0.1584,
"grad_norm": 7.313533782958984,
"learning_rate": 9.682710333541622e-06,
"loss": 0.5303,
"step": 46200
},
{
"ar_loss": 0.3737,
"epoch": 6.9684857801691,
"fm_loss": 0.1586,
"grad_norm": 1.9248658418655396,
"learning_rate": 9.587371183330723e-06,
"loss": 0.5323,
"step": 46300
},
{
"ar_loss": 0.3728,
"epoch": 6.983858570330515,
"fm_loss": 0.1596,
"grad_norm": 2.7737770080566406,
"learning_rate": 9.492392248903505e-06,
"loss": 0.5324,
"step": 46400
},
{
"ar_loss": 0.3755,
"epoch": 6.99923136049193,
"fm_loss": 0.1585,
"grad_norm": 2.6747500896453857,
"learning_rate": 9.397775750057206e-06,
"loss": 0.5341,
"step": 46500
},
{
"ar_loss": 0.3746,
"epoch": 7.014604150653343,
"fm_loss": 0.1594,
"grad_norm": 6.737438678741455,
"learning_rate": 9.303523898118444e-06,
"loss": 0.534,
"step": 46600
},
{
"ar_loss": 0.3741,
"epoch": 7.029976940814758,
"fm_loss": 0.1589,
"grad_norm": 3.4723405838012695,
"learning_rate": 9.209638895891501e-06,
"loss": 0.5331,
"step": 46700
},
{
"ar_loss": 0.3735,
"epoch": 7.045349730976172,
"fm_loss": 0.1594,
"grad_norm": 6.770826816558838,
"learning_rate": 9.116122937606835e-06,
"loss": 0.5329,
"step": 46800
},
{
"ar_loss": 0.3735,
"epoch": 7.060722521137587,
"fm_loss": 0.1589,
"grad_norm": 11.931926727294922,
"learning_rate": 9.022978208869808e-06,
"loss": 0.5323,
"step": 46900
},
{
"ar_loss": 0.3734,
"epoch": 7.076095311299,
"fm_loss": 0.1582,
"grad_norm": 1.7415313720703125,
"learning_rate": 8.930206886609616e-06,
"loss": 0.5316,
"step": 47000
},
{
"ar_loss": 0.3734,
"epoch": 7.091468101460415,
"fm_loss": 0.1591,
"grad_norm": 3.001337766647339,
"learning_rate": 8.837811139028377e-06,
"loss": 0.5325,
"step": 47100
},
{
"ar_loss": 0.3719,
"epoch": 7.10684089162183,
"fm_loss": 0.1588,
"grad_norm": 2.8166637420654297,
"learning_rate": 8.745793125550477e-06,
"loss": 0.5307,
"step": 47200
},
{
"ar_loss": 0.3744,
"epoch": 7.1222136817832435,
"fm_loss": 0.1588,
"grad_norm": 10.10988712310791,
"learning_rate": 8.654154996772114e-06,
"loss": 0.5332,
"step": 47300
},
{
"ar_loss": 0.3758,
"epoch": 7.137586471944658,
"fm_loss": 0.1593,
"grad_norm": 2.6831390857696533,
"learning_rate": 8.562898894411017e-06,
"loss": 0.5351,
"step": 47400
},
{
"ar_loss": 0.371,
"epoch": 7.152959262106072,
"fm_loss": 0.1592,
"grad_norm": 1.959746241569519,
"learning_rate": 8.472026951256381e-06,
"loss": 0.5302,
"step": 47500
},
{
"ar_loss": 0.3719,
"epoch": 7.168332052267487,
"fm_loss": 0.1582,
"grad_norm": 2.5866353511810303,
"learning_rate": 8.38154129111908e-06,
"loss": 0.53,
"step": 47600
},
{
"ar_loss": 0.3731,
"epoch": 7.1837048424289005,
"fm_loss": 0.1584,
"grad_norm": 2.6503565311431885,
"learning_rate": 8.29144402878193e-06,
"loss": 0.5314,
"step": 47700
},
{
"ar_loss": 0.372,
"epoch": 7.199077632590315,
"fm_loss": 0.1606,
"grad_norm": 3.779261589050293,
"learning_rate": 8.201737269950355e-06,
"loss": 0.5325,
"step": 47800
},
{
"ar_loss": 0.371,
"epoch": 7.21445042275173,
"fm_loss": 0.1585,
"grad_norm": 2.4449970722198486,
"learning_rate": 8.112423111203124e-06,
"loss": 0.5295,
"step": 47900
},
{
"ar_loss": 0.3738,
"epoch": 7.229823212913144,
"fm_loss": 0.1584,
"grad_norm": 2.2168331146240234,
"learning_rate": 8.023503639943378e-06,
"loss": 0.5322,
"step": 48000
},
{
"ar_loss": 0.3758,
"epoch": 7.245196003074558,
"fm_loss": 0.158,
"grad_norm": 2.078047275543213,
"learning_rate": 7.934980934349811e-06,
"loss": 0.5338,
"step": 48100
},
{
"ar_loss": 0.3757,
"epoch": 7.260568793235972,
"fm_loss": 0.1583,
"grad_norm": 2.491028308868408,
"learning_rate": 7.846857063328152e-06,
"loss": 0.534,
"step": 48200
},
{
"ar_loss": 0.3748,
"epoch": 7.275941583397387,
"fm_loss": 0.1584,
"grad_norm": 3.340336799621582,
"learning_rate": 7.759134086462753e-06,
"loss": 0.5332,
"step": 48300
},
{
"ar_loss": 0.3735,
"epoch": 7.291314373558801,
"fm_loss": 0.1584,
"grad_norm": 3.3799524307250977,
"learning_rate": 7.671814053968484e-06,
"loss": 0.5319,
"step": 48400
},
{
"ar_loss": 0.3743,
"epoch": 7.306687163720215,
"fm_loss": 0.1587,
"grad_norm": 3.1465113162994385,
"learning_rate": 7.58489900664282e-06,
"loss": 0.533,
"step": 48500
},
{
"ar_loss": 0.3723,
"epoch": 7.322059953881629,
"fm_loss": 0.1613,
"grad_norm": 3.0010464191436768,
"learning_rate": 7.49839097581814e-06,
"loss": 0.5336,
"step": 48600
},
{
"ar_loss": 0.3727,
"epoch": 7.337432744043044,
"fm_loss": 0.1608,
"grad_norm": 8.70785903930664,
"learning_rate": 7.412291983314237e-06,
"loss": 0.5335,
"step": 48700
},
{
"ar_loss": 0.3727,
"epoch": 7.3528055342044585,
"fm_loss": 0.158,
"grad_norm": 3.002357244491577,
"learning_rate": 7.326604041391089e-06,
"loss": 0.5307,
"step": 48800
},
{
"ar_loss": 0.3715,
"epoch": 7.368178324365872,
"fm_loss": 0.1585,
"grad_norm": 2.6171886920928955,
"learning_rate": 7.241329152701812e-06,
"loss": 0.53,
"step": 48900
},
{
"ar_loss": 0.3744,
"epoch": 7.383551114527287,
"fm_loss": 0.1583,
"grad_norm": 2.283306360244751,
"learning_rate": 7.156469310245864e-06,
"loss": 0.5326,
"step": 49000
},
{
"ar_loss": 0.3744,
"epoch": 7.398923904688701,
"fm_loss": 0.1595,
"grad_norm": 6.2937331199646,
"learning_rate": 7.07202649732246e-06,
"loss": 0.5338,
"step": 49100
},
{
"ar_loss": 0.3744,
"epoch": 7.414296694850115,
"fm_loss": 0.1601,
"grad_norm": 4.580973148345947,
"learning_rate": 6.988002687484222e-06,
"loss": 0.5345,
"step": 49200
},
{
"ar_loss": 0.3728,
"epoch": 7.429669485011529,
"fm_loss": 0.158,
"grad_norm": 2.2313263416290283,
"learning_rate": 6.904399844491058e-06,
"loss": 0.5308,
"step": 49300
},
{
"ar_loss": 0.3735,
"epoch": 7.445042275172944,
"fm_loss": 0.1595,
"grad_norm": 3.6399624347686768,
"learning_rate": 6.821219922264252e-06,
"loss": 0.533,
"step": 49400
},
{
"ar_loss": 0.3752,
"epoch": 7.460415065334358,
"fm_loss": 0.1585,
"grad_norm": 3.077457904815674,
"learning_rate": 6.73846486484083e-06,
"loss": 0.5337,
"step": 49500
},
{
"ar_loss": 0.3743,
"epoch": 7.475787855495772,
"fm_loss": 0.1581,
"grad_norm": 2.1495542526245117,
"learning_rate": 6.6561366063280784e-06,
"loss": 0.5324,
"step": 49600
},
{
"ar_loss": 0.3727,
"epoch": 7.491160645657187,
"fm_loss": 0.1584,
"grad_norm": 4.248561382293701,
"learning_rate": 6.574237070858383e-06,
"loss": 0.5311,
"step": 49700
},
{
"ar_loss": 0.3725,
"epoch": 7.506533435818601,
"fm_loss": 0.1588,
"grad_norm": 1.8635345697402954,
"learning_rate": 6.492768172544231e-06,
"loss": 0.5314,
"step": 49800
},
{
"ar_loss": 0.3739,
"epoch": 7.521906225980016,
"fm_loss": 0.159,
"grad_norm": 4.352962970733643,
"learning_rate": 6.411731815433492e-06,
"loss": 0.5329,
"step": 49900
},
{
"ar_loss": 0.3744,
"epoch": 7.537279016141429,
"fm_loss": 0.1597,
"grad_norm": 1.9769291877746582,
"learning_rate": 6.33112989346491e-06,
"loss": 0.5341,
"step": 50000
},
{
"ar_loss": 0.3716,
"epoch": 7.552651806302844,
"fm_loss": 0.1728,
"grad_norm": 2.1171858310699463,
"learning_rate": 6.250964290423847e-06,
"loss": 0.5445,
"step": 50100
},
{
"ar_loss": 0.3727,
"epoch": 7.568024596464259,
"fm_loss": 0.1592,
"grad_norm": 2.2346949577331543,
"learning_rate": 6.171236879898243e-06,
"loss": 0.5319,
"step": 50200
},
{
"ar_loss": 0.3733,
"epoch": 7.5833973866256725,
"fm_loss": 0.1606,
"grad_norm": 3.1455023288726807,
"learning_rate": 6.091949525234838e-06,
"loss": 0.5339,
"step": 50300
},
{
"ar_loss": 0.3767,
"epoch": 7.598770176787087,
"fm_loss": 0.1583,
"grad_norm": 4.480979919433594,
"learning_rate": 6.013104079495621e-06,
"loss": 0.535,
"step": 50400
},
{
"ar_loss": 0.3706,
"epoch": 7.614142966948501,
"fm_loss": 0.1593,
"grad_norm": 3.0612711906433105,
"learning_rate": 5.934702385414517e-06,
"loss": 0.5299,
"step": 50500
},
{
"ar_loss": 0.3729,
"epoch": 7.629515757109916,
"fm_loss": 0.1585,
"grad_norm": 2.3492562770843506,
"learning_rate": 5.856746275354322e-06,
"loss": 0.5313,
"step": 50600
},
{
"ar_loss": 0.3735,
"epoch": 7.6448885472713295,
"fm_loss": 0.1585,
"grad_norm": 4.781151294708252,
"learning_rate": 5.77923757126389e-06,
"loss": 0.532,
"step": 50700
},
{
"ar_loss": 0.3714,
"epoch": 7.660261337432744,
"fm_loss": 0.159,
"grad_norm": 4.14220666885376,
"learning_rate": 5.702178084635526e-06,
"loss": 0.5304,
"step": 50800
},
{
"ar_loss": 0.3755,
"epoch": 7.675634127594158,
"fm_loss": 0.1586,
"grad_norm": 2.7182235717773438,
"learning_rate": 5.625569616462672e-06,
"loss": 0.5341,
"step": 50900
},
{
"ar_loss": 0.375,
"epoch": 7.691006917755573,
"fm_loss": 0.1587,
"grad_norm": 2.1390786170959473,
"learning_rate": 5.549413957197797e-06,
"loss": 0.5337,
"step": 51000
},
{
"ar_loss": 0.3757,
"epoch": 7.706379707916987,
"fm_loss": 0.1591,
"grad_norm": 15.834261894226074,
"learning_rate": 5.473712886710569e-06,
"loss": 0.5348,
"step": 51100
},
{
"ar_loss": 0.3752,
"epoch": 7.721752498078401,
"fm_loss": 0.1587,
"grad_norm": 3.1994242668151855,
"learning_rate": 5.3984681742462435e-06,
"loss": 0.5339,
"step": 51200
},
{
"ar_loss": 0.3728,
"epoch": 7.737125288239816,
"fm_loss": 0.1584,
"grad_norm": 3.1270108222961426,
"learning_rate": 5.323681578384318e-06,
"loss": 0.5312,
"step": 51300
},
{
"ar_loss": 0.3753,
"epoch": 7.75249807840123,
"fm_loss": 0.1588,
"grad_norm": 4.753870964050293,
"learning_rate": 5.24935484699744e-06,
"loss": 0.5341,
"step": 51400
},
{
"ar_loss": 0.3751,
"epoch": 7.767870868562644,
"fm_loss": 0.1591,
"grad_norm": 2.4398574829101562,
"learning_rate": 5.175489717210532e-06,
"loss": 0.5343,
"step": 51500
},
{
"ar_loss": 0.3723,
"epoch": 7.783243658724058,
"fm_loss": 0.1603,
"grad_norm": 1.8831534385681152,
"learning_rate": 5.102087915360229e-06,
"loss": 0.5326,
"step": 51600
},
{
"ar_loss": 0.3731,
"epoch": 7.798616448885473,
"fm_loss": 0.1579,
"grad_norm": 3.3795456886291504,
"learning_rate": 5.0291511569544955e-06,
"loss": 0.5311,
"step": 51700
},
{
"ar_loss": 0.3724,
"epoch": 7.813989239046887,
"fm_loss": 0.159,
"grad_norm": 6.143826007843018,
"learning_rate": 4.956681146632553e-06,
"loss": 0.5314,
"step": 51800
},
{
"ar_loss": 0.3737,
"epoch": 7.829362029208301,
"fm_loss": 0.1575,
"grad_norm": 3.264896869659424,
"learning_rate": 4.884679578125029e-06,
"loss": 0.5312,
"step": 51900
},
{
"ar_loss": 0.3744,
"epoch": 7.844734819369716,
"fm_loss": 0.1608,
"grad_norm": 2.4835152626037598,
"learning_rate": 4.813148134214396e-06,
"loss": 0.5352,
"step": 52000
},
{
"ar_loss": 0.3762,
"epoch": 7.86010760953113,
"fm_loss": 0.1581,
"grad_norm": 2.550990581512451,
"learning_rate": 4.742088486695604e-06,
"loss": 0.5343,
"step": 52100
},
{
"ar_loss": 0.3731,
"epoch": 7.875480399692544,
"fm_loss": 0.1575,
"grad_norm": 6.491451740264893,
"learning_rate": 4.671502296337033e-06,
"loss": 0.5307,
"step": 52200
},
{
"ar_loss": 0.3729,
"epoch": 7.890853189853958,
"fm_loss": 0.1588,
"grad_norm": 3.544994354248047,
"learning_rate": 4.60139121284168e-06,
"loss": 0.5317,
"step": 52300
},
{
"ar_loss": 0.3753,
"epoch": 7.906225980015373,
"fm_loss": 0.1597,
"grad_norm": 2.540804386138916,
"learning_rate": 4.531756874808585e-06,
"loss": 0.535,
"step": 52400
},
{
"ar_loss": 0.375,
"epoch": 7.921598770176787,
"fm_loss": 0.1583,
"grad_norm": 3.565279722213745,
"learning_rate": 4.462600909694559e-06,
"loss": 0.5334,
"step": 52500
},
{
"ar_loss": 0.3729,
"epoch": 7.936971560338201,
"fm_loss": 0.1593,
"grad_norm": 2.623326539993286,
"learning_rate": 4.393924933776122e-06,
"loss": 0.5321,
"step": 52600
},
{
"ar_loss": 0.3763,
"epoch": 7.952344350499615,
"fm_loss": 0.158,
"grad_norm": 3.847337007522583,
"learning_rate": 4.325730552111754e-06,
"loss": 0.5343,
"step": 52700
},
{
"ar_loss": 0.3734,
"epoch": 7.96771714066103,
"fm_loss": 0.1577,
"grad_norm": 2.1040573120117188,
"learning_rate": 4.258019358504359e-06,
"loss": 0.5312,
"step": 52800
},
{
"ar_loss": 0.3727,
"epoch": 7.983089930822445,
"fm_loss": 0.1585,
"grad_norm": 2.5626471042633057,
"learning_rate": 4.190792935464033e-06,
"loss": 0.5311,
"step": 52900
},
{
"ar_loss": 0.3735,
"epoch": 7.998462720983858,
"fm_loss": 0.16,
"grad_norm": 2.844808340072632,
"learning_rate": 4.124052854171068e-06,
"loss": 0.5335,
"step": 53000
},
{
"ar_loss": 0.3713,
"epoch": 8.013835511145272,
"fm_loss": 0.1589,
"grad_norm": 3.6812970638275146,
"learning_rate": 4.057800674439227e-06,
"loss": 0.5302,
"step": 53100
},
{
"ar_loss": 0.3762,
"epoch": 8.029208301306687,
"fm_loss": 0.158,
"grad_norm": 3.114596128463745,
"learning_rate": 3.992037944679322e-06,
"loss": 0.5342,
"step": 53200
},
{
"ar_loss": 0.3731,
"epoch": 8.044581091468102,
"fm_loss": 0.1582,
"grad_norm": 7.761219501495361,
"learning_rate": 3.926766201862972e-06,
"loss": 0.5313,
"step": 53300
},
{
"ar_loss": 0.3736,
"epoch": 8.059953881629516,
"fm_loss": 0.1572,
"grad_norm": 2.6152584552764893,
"learning_rate": 3.861986971486725e-06,
"loss": 0.5307,
"step": 53400
},
{
"ar_loss": 0.3762,
"epoch": 8.07532667179093,
"fm_loss": 0.1578,
"grad_norm": 10.676106452941895,
"learning_rate": 3.7977017675363826e-06,
"loss": 0.5339,
"step": 53500
},
{
"ar_loss": 0.3778,
"epoch": 8.090699461952344,
"fm_loss": 0.1581,
"grad_norm": 2.382251024246216,
"learning_rate": 3.7339120924516276e-06,
"loss": 0.5359,
"step": 53600
},
{
"ar_loss": 0.3755,
"epoch": 8.106072252113758,
"fm_loss": 0.1586,
"grad_norm": 2.832390308380127,
"learning_rate": 3.6706194370909025e-06,
"loss": 0.5341,
"step": 53700
},
{
"ar_loss": 0.3725,
"epoch": 8.121445042275173,
"fm_loss": 0.1585,
"grad_norm": 2.2325046062469482,
"learning_rate": 3.6078252806965667e-06,
"loss": 0.531,
"step": 53800
},
{
"ar_loss": 0.3749,
"epoch": 8.136817832436588,
"fm_loss": 0.1579,
"grad_norm": 2.05092716217041,
"learning_rate": 3.5455310908603293e-06,
"loss": 0.5328,
"step": 53900
},
{
"ar_loss": 0.3705,
"epoch": 8.152190622598,
"fm_loss": 0.1623,
"grad_norm": 2.234282970428467,
"learning_rate": 3.4837383234889498e-06,
"loss": 0.5328,
"step": 54000
},
{
"ar_loss": 0.3727,
"epoch": 8.167563412759415,
"fm_loss": 0.1582,
"grad_norm": 2.087803363800049,
"learning_rate": 3.422448422770197e-06,
"loss": 0.5309,
"step": 54100
},
{
"ar_loss": 0.3737,
"epoch": 8.18293620292083,
"fm_loss": 0.1579,
"grad_norm": 2.0325510501861572,
"learning_rate": 3.3616628211391193e-06,
"loss": 0.5316,
"step": 54200
},
{
"ar_loss": 0.3734,
"epoch": 8.198308993082245,
"fm_loss": 0.1578,
"grad_norm": 4.768230438232422,
"learning_rate": 3.3013829392445434e-06,
"loss": 0.5312,
"step": 54300
},
{
"ar_loss": 0.371,
"epoch": 8.21368178324366,
"fm_loss": 0.1578,
"grad_norm": 9.634552955627441,
"learning_rate": 3.2416101859158887e-06,
"loss": 0.5289,
"step": 54400
},
{
"ar_loss": 0.3733,
"epoch": 8.229054573405072,
"fm_loss": 0.16,
"grad_norm": 2.277290105819702,
"learning_rate": 3.1823459581302394e-06,
"loss": 0.5333,
"step": 54500
},
{
"ar_loss": 0.3716,
"epoch": 8.244427363566487,
"fm_loss": 0.158,
"grad_norm": 6.1840996742248535,
"learning_rate": 3.123591640979681e-06,
"loss": 0.5296,
"step": 54600
},
{
"ar_loss": 0.3735,
"epoch": 8.259800153727902,
"fm_loss": 0.1592,
"grad_norm": 3.6673200130462646,
"learning_rate": 3.065348607638946e-06,
"loss": 0.5327,
"step": 54700
},
{
"ar_loss": 0.3722,
"epoch": 8.275172943889316,
"fm_loss": 0.1583,
"grad_norm": 2.28403902053833,
"learning_rate": 3.0076182193333053e-06,
"loss": 0.5306,
"step": 54800
},
{
"ar_loss": 0.3706,
"epoch": 8.29054573405073,
"fm_loss": 0.1588,
"grad_norm": 2.709955930709839,
"learning_rate": 2.9504018253067673e-06,
"loss": 0.5294,
"step": 54900
},
{
"ar_loss": 0.3732,
"epoch": 8.305918524212144,
"fm_loss": 0.1583,
"grad_norm": 2.438748359680176,
"learning_rate": 2.8937007627905354e-06,
"loss": 0.5314,
"step": 55000
},
{
"ar_loss": 0.3735,
"epoch": 8.321291314373559,
"fm_loss": 0.1589,
"grad_norm": 1.9475317001342773,
"learning_rate": 2.8375163569717645e-06,
"loss": 0.5324,
"step": 55100
},
{
"ar_loss": 0.375,
"epoch": 8.336664104534973,
"fm_loss": 0.1587,
"grad_norm": 2.430262804031372,
"learning_rate": 2.781849920962576e-06,
"loss": 0.5337,
"step": 55200
},
{
"ar_loss": 0.3742,
"epoch": 8.352036894696388,
"fm_loss": 0.1588,
"grad_norm": 3.527625799179077,
"learning_rate": 2.726702755769381e-06,
"loss": 0.5329,
"step": 55300
},
{
"ar_loss": 0.3744,
"epoch": 8.367409684857801,
"fm_loss": 0.1584,
"grad_norm": 2.9451231956481934,
"learning_rate": 2.6720761502624674e-06,
"loss": 0.5328,
"step": 55400
},
{
"ar_loss": 0.3716,
"epoch": 8.382782475019216,
"fm_loss": 0.1599,
"grad_norm": 4.992424488067627,
"learning_rate": 2.6179713811458726e-06,
"loss": 0.5315,
"step": 55500
},
{
"ar_loss": 0.3727,
"epoch": 8.39815526518063,
"fm_loss": 0.1585,
"grad_norm": 4.794875144958496,
"learning_rate": 2.564389712927556e-06,
"loss": 0.5313,
"step": 55600
},
{
"ar_loss": 0.3728,
"epoch": 8.413528055342045,
"fm_loss": 0.1608,
"grad_norm": 7.288453102111816,
"learning_rate": 2.5113323978898455e-06,
"loss": 0.5336,
"step": 55700
},
{
"ar_loss": 0.3732,
"epoch": 8.42890084550346,
"fm_loss": 0.1583,
"grad_norm": 2.7333781719207764,
"learning_rate": 2.458800676060158e-06,
"loss": 0.5316,
"step": 55800
},
{
"ar_loss": 0.3737,
"epoch": 8.444273635664873,
"fm_loss": 0.1584,
"grad_norm": 2.3420047760009766,
"learning_rate": 2.406795775182025e-06,
"loss": 0.532,
"step": 55900
},
{
"ar_loss": 0.3742,
"epoch": 8.459646425826287,
"fm_loss": 0.1583,
"grad_norm": 2.3550283908843994,
"learning_rate": 2.355318910686394e-06,
"loss": 0.5325,
"step": 56000
},
{
"ar_loss": 0.3755,
"epoch": 8.475019215987702,
"fm_loss": 0.1609,
"grad_norm": 2.22489595413208,
"learning_rate": 2.304371285663237e-06,
"loss": 0.5364,
"step": 56100
},
{
"ar_loss": 0.3723,
"epoch": 8.490392006149117,
"fm_loss": 0.1578,
"grad_norm": 12.48835563659668,
"learning_rate": 2.2539540908334157e-06,
"loss": 0.5301,
"step": 56200
},
{
"ar_loss": 0.3729,
"epoch": 8.50576479631053,
"fm_loss": 0.159,
"grad_norm": 2.215409994125366,
"learning_rate": 2.204068504520859e-06,
"loss": 0.5319,
"step": 56300
},
{
"ar_loss": 0.3739,
"epoch": 8.521137586471944,
"fm_loss": 0.1578,
"grad_norm": 3.4783434867858887,
"learning_rate": 2.15471569262502e-06,
"loss": 0.5317,
"step": 56400
},
{
"ar_loss": 0.3721,
"epoch": 8.536510376633359,
"fm_loss": 0.1576,
"grad_norm": 2.0251822471618652,
"learning_rate": 2.1058968085936383e-06,
"loss": 0.5297,
"step": 56500
},
{
"ar_loss": 0.374,
"epoch": 8.551883166794774,
"fm_loss": 0.1583,
"grad_norm": 11.012389183044434,
"learning_rate": 2.0576129933957715e-06,
"loss": 0.5323,
"step": 56600
},
{
"ar_loss": 0.373,
"epoch": 8.567255956956188,
"fm_loss": 0.1579,
"grad_norm": 2.4049017429351807,
"learning_rate": 2.009865375495129e-06,
"loss": 0.531,
"step": 56700
},
{
"ar_loss": 0.3748,
"epoch": 8.582628747117601,
"fm_loss": 0.1591,
"grad_norm": 2.692699909210205,
"learning_rate": 1.9626550708237075e-06,
"loss": 0.5339,
"step": 56800
},
{
"ar_loss": 0.3729,
"epoch": 8.598001537279016,
"fm_loss": 0.1586,
"grad_norm": 2.713548183441162,
"learning_rate": 1.915983182755696e-06,
"loss": 0.5315,
"step": 56900
},
{
"ar_loss": 0.3711,
"epoch": 8.61337432744043,
"fm_loss": 0.1575,
"grad_norm": 6.727135181427002,
"learning_rate": 1.8698508020817045e-06,
"loss": 0.5286,
"step": 57000
},
{
"ar_loss": 0.3742,
"epoch": 8.628747117601845,
"fm_loss": 0.1583,
"grad_norm": 3.071518659591675,
"learning_rate": 1.8242590069832617e-06,
"loss": 0.5325,
"step": 57100
},
{
"ar_loss": 0.3712,
"epoch": 8.644119907763258,
"fm_loss": 0.1584,
"grad_norm": 2.538884401321411,
"learning_rate": 1.7792088630076086e-06,
"loss": 0.5296,
"step": 57200
},
{
"ar_loss": 0.374,
"epoch": 8.659492697924673,
"fm_loss": 0.1583,
"grad_norm": 2.6885924339294434,
"learning_rate": 1.7347014230428144e-06,
"loss": 0.5323,
"step": 57300
},
{
"ar_loss": 0.3721,
"epoch": 8.674865488086088,
"fm_loss": 0.1579,
"grad_norm": 2.660156726837158,
"learning_rate": 1.6907377272931485e-06,
"loss": 0.53,
"step": 57400
},
{
"ar_loss": 0.3725,
"epoch": 8.690238278247502,
"fm_loss": 0.1586,
"grad_norm": 2.4190871715545654,
"learning_rate": 1.6473188032547854e-06,
"loss": 0.5311,
"step": 57500
},
{
"ar_loss": 0.3711,
"epoch": 8.705611068408917,
"fm_loss": 0.1581,
"grad_norm": 3.2725918292999268,
"learning_rate": 1.6044456656917839e-06,
"loss": 0.5291,
"step": 57600
},
{
"ar_loss": 0.3725,
"epoch": 8.72098385857033,
"fm_loss": 0.1632,
"grad_norm": 4.782108783721924,
"learning_rate": 1.5621193166123648e-06,
"loss": 0.5357,
"step": 57700
},
{
"ar_loss": 0.3747,
"epoch": 8.736356648731745,
"fm_loss": 0.158,
"grad_norm": 7.434814929962158,
"learning_rate": 1.5203407452455076e-06,
"loss": 0.5327,
"step": 57800
},
{
"ar_loss": 0.3704,
"epoch": 8.75172943889316,
"fm_loss": 0.1581,
"grad_norm": 2.1383039951324463,
"learning_rate": 1.4791109280178129e-06,
"loss": 0.5285,
"step": 57900
},
{
"ar_loss": 0.3772,
"epoch": 8.767102229054574,
"fm_loss": 0.158,
"grad_norm": 26.415754318237305,
"learning_rate": 1.4384308285306959e-06,
"loss": 0.5352,
"step": 58000
},
{
"ar_loss": 0.3721,
"epoch": 8.782475019215987,
"fm_loss": 0.1575,
"grad_norm": 8.601106643676758,
"learning_rate": 1.3983013975378574e-06,
"loss": 0.5296,
"step": 58100
},
{
"ar_loss": 0.372,
"epoch": 8.797847809377402,
"fm_loss": 0.1584,
"grad_norm": 2.2945618629455566,
"learning_rate": 1.3587235729230764e-06,
"loss": 0.5303,
"step": 58200
},
{
"ar_loss": 0.3747,
"epoch": 8.813220599538816,
"fm_loss": 0.1584,
"grad_norm": 4.096945762634277,
"learning_rate": 1.3196982796782636e-06,
"loss": 0.5331,
"step": 58300
},
{
"ar_loss": 0.3752,
"epoch": 8.82859338970023,
"fm_loss": 0.1599,
"grad_norm": 3.189495801925659,
"learning_rate": 1.2812264298818737e-06,
"loss": 0.5351,
"step": 58400
},
{
"ar_loss": 0.3725,
"epoch": 8.843966179861646,
"fm_loss": 0.1587,
"grad_norm": 3.499967098236084,
"learning_rate": 1.2433089226775662e-06,
"loss": 0.5312,
"step": 58500
},
{
"ar_loss": 0.3716,
"epoch": 8.859338970023058,
"fm_loss": 0.1591,
"grad_norm": 2.2825992107391357,
"learning_rate": 1.2059466442532004e-06,
"loss": 0.5307,
"step": 58600
},
{
"ar_loss": 0.372,
"epoch": 8.874711760184473,
"fm_loss": 0.158,
"grad_norm": 3.344238042831421,
"learning_rate": 1.1691404678201317e-06,
"loss": 0.53,
"step": 58700
},
{
"ar_loss": 0.372,
"epoch": 8.890084550345888,
"fm_loss": 0.1588,
"grad_norm": 3.1672725677490234,
"learning_rate": 1.1328912535927828e-06,
"loss": 0.5308,
"step": 58800
},
{
"ar_loss": 0.3724,
"epoch": 8.905457340507303,
"fm_loss": 0.1581,
"grad_norm": 19.84259605407715,
"learning_rate": 1.0971998487685597e-06,
"loss": 0.5304,
"step": 58900
},
{
"ar_loss": 0.3746,
"epoch": 8.920830130668715,
"fm_loss": 0.1598,
"grad_norm": 3.0031962394714355,
"learning_rate": 1.0620670875080397e-06,
"loss": 0.5344,
"step": 59000
},
{
"ar_loss": 0.3723,
"epoch": 8.93620292083013,
"fm_loss": 0.159,
"grad_norm": 6.419062614440918,
"learning_rate": 1.0274937909154792e-06,
"loss": 0.5314,
"step": 59100
},
{
"ar_loss": 0.3777,
"epoch": 8.951575710991545,
"fm_loss": 0.1597,
"grad_norm": 2.2336511611938477,
"learning_rate": 9.934807670196223e-07,
"loss": 0.5373,
"step": 59200
},
{
"ar_loss": 0.3717,
"epoch": 8.96694850115296,
"fm_loss": 0.1585,
"grad_norm": 1.7860270738601685,
"learning_rate": 9.600288107548233e-07,
"loss": 0.5302,
"step": 59300
},
{
"ar_loss": 0.3732,
"epoch": 8.982321291314374,
"fm_loss": 0.1591,
"grad_norm": 2.5118651390075684,
"learning_rate": 9.271387039424456e-07,
"loss": 0.5322,
"step": 59400
},
{
"ar_loss": 0.3723,
"epoch": 8.997694081475787,
"fm_loss": 0.1586,
"grad_norm": 2.007235288619995,
"learning_rate": 8.948112152726285e-07,
"loss": 0.5308,
"step": 59500
},
{
"ar_loss": 0.3726,
"epoch": 9.013066871637202,
"fm_loss": 0.1579,
"grad_norm": 3.087045907974243,
"learning_rate": 8.630471002862795e-07,
"loss": 0.5305,
"step": 59600
},
{
"ar_loss": 0.373,
"epoch": 9.028439661798616,
"fm_loss": 0.1594,
"grad_norm": 2.6808998584747314,
"learning_rate": 8.318471013574442e-07,
"loss": 0.5324,
"step": 59700
},
{
"ar_loss": 0.3725,
"epoch": 9.043812451960031,
"fm_loss": 0.1586,
"grad_norm": 2.7164993286132812,
"learning_rate": 8.01211947675945e-07,
"loss": 0.5311,
"step": 59800
},
{
"ar_loss": 0.3736,
"epoch": 9.059185242121446,
"fm_loss": 0.1582,
"grad_norm": 3.879828929901123,
"learning_rate": 7.711423552303366e-07,
"loss": 0.5318,
"step": 59900
},
{
"ar_loss": 0.3727,
"epoch": 9.074558032282859,
"fm_loss": 0.1574,
"grad_norm": 2.1200876235961914,
"learning_rate": 7.416390267911827e-07,
"loss": 0.5301,
"step": 60000
}
],
"logging_steps": 100,
"max_steps": 65050,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 9.656055098925268e+20,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}