| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.074558032282859, | |
| "eval_steps": 500, | |
| "global_step": 60000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "ar_loss": 0.7918, | |
| "epoch": 0.015039855617386072, | |
| "fm_loss": 2.2779, | |
| "grad_norm": 4.37157678604126, | |
| "learning_rate": 4.9999971366617235e-05, | |
| "loss": 3.0697, | |
| "step": 100 | |
| }, | |
| { | |
| "ar_loss": 0.6852, | |
| "epoch": 0.030079711234772145, | |
| "fm_loss": 0.5703, | |
| "grad_norm": 4.328033447265625, | |
| "learning_rate": 4.999951278658594e-05, | |
| "loss": 1.2555, | |
| "step": 200 | |
| }, | |
| { | |
| "ar_loss": 0.669, | |
| "epoch": 0.04511956685215822, | |
| "fm_loss": 0.3866, | |
| "grad_norm": 3.0744214057922363, | |
| "learning_rate": 4.9998494972632174e-05, | |
| "loss": 1.0556, | |
| "step": 300 | |
| }, | |
| { | |
| "ar_loss": 0.6594, | |
| "epoch": 0.06015942246954429, | |
| "fm_loss": 0.319, | |
| "grad_norm": 2.9869937896728516, | |
| "learning_rate": 4.9996917947524234e-05, | |
| "loss": 0.9783, | |
| "step": 400 | |
| }, | |
| { | |
| "ar_loss": 0.6539, | |
| "epoch": 0.07519927808693036, | |
| "fm_loss": 0.2901, | |
| "grad_norm": 2.676532745361328, | |
| "learning_rate": 4.999478174653984e-05, | |
| "loss": 0.9441, | |
| "step": 500 | |
| }, | |
| { | |
| "ar_loss": 0.6451, | |
| "epoch": 0.09023913370431644, | |
| "fm_loss": 0.271, | |
| "grad_norm": 2.8496103286743164, | |
| "learning_rate": 4.999208641746537e-05, | |
| "loss": 0.9161, | |
| "step": 600 | |
| }, | |
| { | |
| "ar_loss": 0.639, | |
| "epoch": 0.10527898932170252, | |
| "fm_loss": 0.262, | |
| "grad_norm": 3.5984983444213867, | |
| "learning_rate": 4.998883202059478e-05, | |
| "loss": 0.901, | |
| "step": 700 | |
| }, | |
| { | |
| "ar_loss": 0.6368, | |
| "epoch": 0.12031884493908858, | |
| "fm_loss": 0.251, | |
| "grad_norm": 3.3547682762145996, | |
| "learning_rate": 4.998501862872824e-05, | |
| "loss": 0.8878, | |
| "step": 800 | |
| }, | |
| { | |
| "ar_loss": 0.6325, | |
| "epoch": 0.13535870055647467, | |
| "fm_loss": 0.2422, | |
| "grad_norm": 3.7177250385284424, | |
| "learning_rate": 4.998064632717054e-05, | |
| "loss": 0.8747, | |
| "step": 900 | |
| }, | |
| { | |
| "ar_loss": 0.6295, | |
| "epoch": 0.15039855617386072, | |
| "fm_loss": 0.2364, | |
| "grad_norm": 2.8715012073516846, | |
| "learning_rate": 4.997571521372918e-05, | |
| "loss": 0.8659, | |
| "step": 1000 | |
| }, | |
| { | |
| "ar_loss": 0.6261, | |
| "epoch": 0.1654384117912468, | |
| "fm_loss": 0.233, | |
| "grad_norm": 2.686033248901367, | |
| "learning_rate": 4.9970225398712116e-05, | |
| "loss": 0.8591, | |
| "step": 1100 | |
| }, | |
| { | |
| "ar_loss": 0.6227, | |
| "epoch": 0.18047826740863288, | |
| "fm_loss": 0.2337, | |
| "grad_norm": 2.735515594482422, | |
| "learning_rate": 4.99641770049254e-05, | |
| "loss": 0.8564, | |
| "step": 1200 | |
| }, | |
| { | |
| "ar_loss": 0.6288, | |
| "epoch": 0.19551812302601895, | |
| "fm_loss": 0.2343, | |
| "grad_norm": 3.192143440246582, | |
| "learning_rate": 4.995757016767032e-05, | |
| "loss": 0.8631, | |
| "step": 1300 | |
| }, | |
| { | |
| "ar_loss": 0.6179, | |
| "epoch": 0.21055797864340503, | |
| "fm_loss": 0.2254, | |
| "grad_norm": 4.529001235961914, | |
| "learning_rate": 4.995040503474049e-05, | |
| "loss": 0.8433, | |
| "step": 1400 | |
| }, | |
| { | |
| "ar_loss": 0.6117, | |
| "epoch": 0.2255978342607911, | |
| "fm_loss": 0.2265, | |
| "grad_norm": 2.0048255920410156, | |
| "learning_rate": 4.994268176641842e-05, | |
| "loss": 0.8382, | |
| "step": 1500 | |
| }, | |
| { | |
| "ar_loss": 0.6197, | |
| "epoch": 0.24063768987817716, | |
| "fm_loss": 0.2174, | |
| "grad_norm": 2.0079429149627686, | |
| "learning_rate": 4.993440053547204e-05, | |
| "loss": 0.8372, | |
| "step": 1600 | |
| }, | |
| { | |
| "ar_loss": 0.6073, | |
| "epoch": 0.25567754549556326, | |
| "fm_loss": 0.2158, | |
| "grad_norm": 2.0703108310699463, | |
| "learning_rate": 4.992556152715076e-05, | |
| "loss": 0.8231, | |
| "step": 1700 | |
| }, | |
| { | |
| "ar_loss": 0.6091, | |
| "epoch": 0.27071740111294934, | |
| "fm_loss": 0.2121, | |
| "grad_norm": 2.3248846530914307, | |
| "learning_rate": 4.991616493918137e-05, | |
| "loss": 0.8213, | |
| "step": 1800 | |
| }, | |
| { | |
| "ar_loss": 0.608, | |
| "epoch": 0.28575725673033536, | |
| "fm_loss": 0.2088, | |
| "grad_norm": 2.5417592525482178, | |
| "learning_rate": 4.99062109817636e-05, | |
| "loss": 0.8167, | |
| "step": 1900 | |
| }, | |
| { | |
| "ar_loss": 0.6097, | |
| "epoch": 0.30079711234772144, | |
| "fm_loss": 0.21, | |
| "grad_norm": 2.219682216644287, | |
| "learning_rate": 4.989569987756542e-05, | |
| "loss": 0.8196, | |
| "step": 2000 | |
| }, | |
| { | |
| "ar_loss": 0.6056, | |
| "epoch": 0.3158369679651075, | |
| "fm_loss": 0.2079, | |
| "grad_norm": 1.9187816381454468, | |
| "learning_rate": 4.988463186171804e-05, | |
| "loss": 0.8136, | |
| "step": 2100 | |
| }, | |
| { | |
| "ar_loss": 0.6064, | |
| "epoch": 0.3308768235824936, | |
| "fm_loss": 0.2072, | |
| "grad_norm": 2.246711492538452, | |
| "learning_rate": 4.987300718181068e-05, | |
| "loss": 0.8136, | |
| "step": 2200 | |
| }, | |
| { | |
| "ar_loss": 0.6015, | |
| "epoch": 0.3459166791998797, | |
| "fm_loss": 0.2044, | |
| "grad_norm": 1.855542540550232, | |
| "learning_rate": 4.986082609788504e-05, | |
| "loss": 0.8059, | |
| "step": 2300 | |
| }, | |
| { | |
| "ar_loss": 0.6, | |
| "epoch": 0.36095653481726575, | |
| "fm_loss": 0.2081, | |
| "grad_norm": 1.883419156074524, | |
| "learning_rate": 4.9848088882429426e-05, | |
| "loss": 0.8082, | |
| "step": 2400 | |
| }, | |
| { | |
| "ar_loss": 0.6048, | |
| "epoch": 0.37599639043465183, | |
| "fm_loss": 0.201, | |
| "grad_norm": 4.141597747802734, | |
| "learning_rate": 4.983479582037272e-05, | |
| "loss": 0.8058, | |
| "step": 2500 | |
| }, | |
| { | |
| "ar_loss": 0.6025, | |
| "epoch": 0.3910362460520379, | |
| "fm_loss": 0.2012, | |
| "grad_norm": 1.3507440090179443, | |
| "learning_rate": 4.9820947209077965e-05, | |
| "loss": 0.8037, | |
| "step": 2600 | |
| }, | |
| { | |
| "ar_loss": 0.5966, | |
| "epoch": 0.406076101669424, | |
| "fm_loss": 0.2103, | |
| "grad_norm": 1.5875388383865356, | |
| "learning_rate": 4.980654335833572e-05, | |
| "loss": 0.8069, | |
| "step": 2700 | |
| }, | |
| { | |
| "ar_loss": 0.5995, | |
| "epoch": 0.42111595728681006, | |
| "fm_loss": 0.1975, | |
| "grad_norm": 2.4850363731384277, | |
| "learning_rate": 4.979158459035715e-05, | |
| "loss": 0.7971, | |
| "step": 2800 | |
| }, | |
| { | |
| "ar_loss": 0.595, | |
| "epoch": 0.43615581290419614, | |
| "fm_loss": 0.1981, | |
| "grad_norm": 2.0931544303894043, | |
| "learning_rate": 4.97760712397668e-05, | |
| "loss": 0.7931, | |
| "step": 2900 | |
| }, | |
| { | |
| "ar_loss": 0.5959, | |
| "epoch": 0.4511956685215822, | |
| "fm_loss": 0.1984, | |
| "grad_norm": 2.2044456005096436, | |
| "learning_rate": 4.97600036535951e-05, | |
| "loss": 0.7943, | |
| "step": 3000 | |
| }, | |
| { | |
| "ar_loss": 0.5894, | |
| "epoch": 0.4662355241389683, | |
| "fm_loss": 0.197, | |
| "grad_norm": 2.905266046524048, | |
| "learning_rate": 4.974338219127062e-05, | |
| "loss": 0.7863, | |
| "step": 3100 | |
| }, | |
| { | |
| "ar_loss": 0.5851, | |
| "epoch": 0.4812753797563543, | |
| "fm_loss": 0.1953, | |
| "grad_norm": 2.5252127647399902, | |
| "learning_rate": 4.9726207224612034e-05, | |
| "loss": 0.7805, | |
| "step": 3200 | |
| }, | |
| { | |
| "ar_loss": 0.5936, | |
| "epoch": 0.4963152353737404, | |
| "fm_loss": 0.1957, | |
| "grad_norm": 2.7935545444488525, | |
| "learning_rate": 4.97084791378198e-05, | |
| "loss": 0.7893, | |
| "step": 3300 | |
| }, | |
| { | |
| "ar_loss": 0.5869, | |
| "epoch": 0.5113550909911265, | |
| "fm_loss": 0.1933, | |
| "grad_norm": 1.2401388883590698, | |
| "learning_rate": 4.9690198327467534e-05, | |
| "loss": 0.7802, | |
| "step": 3400 | |
| }, | |
| { | |
| "ar_loss": 0.5888, | |
| "epoch": 0.5263949466085126, | |
| "fm_loss": 0.1944, | |
| "grad_norm": 3.7216362953186035, | |
| "learning_rate": 4.967136520249318e-05, | |
| "loss": 0.7832, | |
| "step": 3500 | |
| }, | |
| { | |
| "ar_loss": 0.587, | |
| "epoch": 0.5414348022258987, | |
| "fm_loss": 0.1916, | |
| "grad_norm": 1.946953296661377, | |
| "learning_rate": 4.965198018418985e-05, | |
| "loss": 0.7786, | |
| "step": 3600 | |
| }, | |
| { | |
| "ar_loss": 0.5828, | |
| "epoch": 0.5564746578432848, | |
| "fm_loss": 0.1921, | |
| "grad_norm": 1.695999264717102, | |
| "learning_rate": 4.963204370619637e-05, | |
| "loss": 0.7749, | |
| "step": 3700 | |
| }, | |
| { | |
| "ar_loss": 0.5832, | |
| "epoch": 0.5715145134606707, | |
| "fm_loss": 0.1973, | |
| "grad_norm": 2.257406234741211, | |
| "learning_rate": 4.9611556214487645e-05, | |
| "loss": 0.7805, | |
| "step": 3800 | |
| }, | |
| { | |
| "ar_loss": 0.5825, | |
| "epoch": 0.5865543690780568, | |
| "fm_loss": 0.1915, | |
| "grad_norm": 2.4373984336853027, | |
| "learning_rate": 4.95905181673646e-05, | |
| "loss": 0.7739, | |
| "step": 3900 | |
| }, | |
| { | |
| "ar_loss": 0.5809, | |
| "epoch": 0.6015942246954429, | |
| "fm_loss": 0.1924, | |
| "grad_norm": 1.4836270809173584, | |
| "learning_rate": 4.956893003544401e-05, | |
| "loss": 0.7733, | |
| "step": 4000 | |
| }, | |
| { | |
| "ar_loss": 0.5801, | |
| "epoch": 0.616634080312829, | |
| "fm_loss": 0.1956, | |
| "grad_norm": 1.864709734916687, | |
| "learning_rate": 4.954679230164789e-05, | |
| "loss": 0.7757, | |
| "step": 4100 | |
| }, | |
| { | |
| "ar_loss": 0.5824, | |
| "epoch": 0.631673935930215, | |
| "fm_loss": 0.1895, | |
| "grad_norm": 1.5477041006088257, | |
| "learning_rate": 4.952410546119278e-05, | |
| "loss": 0.7719, | |
| "step": 4200 | |
| }, | |
| { | |
| "ar_loss": 0.5819, | |
| "epoch": 0.6467137915476011, | |
| "fm_loss": 0.1925, | |
| "grad_norm": 1.6031211614608765, | |
| "learning_rate": 4.95008700215786e-05, | |
| "loss": 0.7744, | |
| "step": 4300 | |
| }, | |
| { | |
| "ar_loss": 0.5778, | |
| "epoch": 0.6617536471649872, | |
| "fm_loss": 0.2067, | |
| "grad_norm": 5.476424694061279, | |
| "learning_rate": 4.947708650257732e-05, | |
| "loss": 0.7845, | |
| "step": 4400 | |
| }, | |
| { | |
| "ar_loss": 0.5767, | |
| "epoch": 0.6767935027823733, | |
| "fm_loss": 0.1875, | |
| "grad_norm": 1.901498794555664, | |
| "learning_rate": 4.945275543622133e-05, | |
| "loss": 0.7642, | |
| "step": 4500 | |
| }, | |
| { | |
| "ar_loss": 0.5766, | |
| "epoch": 0.6918333583997593, | |
| "fm_loss": 0.1912, | |
| "grad_norm": 1.4781795740127563, | |
| "learning_rate": 4.942787736679153e-05, | |
| "loss": 0.7678, | |
| "step": 4600 | |
| }, | |
| { | |
| "ar_loss": 0.5735, | |
| "epoch": 0.7068732140171454, | |
| "fm_loss": 0.1893, | |
| "grad_norm": 2.639249801635742, | |
| "learning_rate": 4.940245285080521e-05, | |
| "loss": 0.7628, | |
| "step": 4700 | |
| }, | |
| { | |
| "ar_loss": 0.5736, | |
| "epoch": 0.7219130696345315, | |
| "fm_loss": 0.1854, | |
| "grad_norm": 1.5374678373336792, | |
| "learning_rate": 4.93764824570035e-05, | |
| "loss": 0.759, | |
| "step": 4800 | |
| }, | |
| { | |
| "ar_loss": 0.5755, | |
| "epoch": 0.7369529252519176, | |
| "fm_loss": 0.187, | |
| "grad_norm": 1.971647024154663, | |
| "learning_rate": 4.934996676633874e-05, | |
| "loss": 0.7625, | |
| "step": 4900 | |
| }, | |
| { | |
| "ar_loss": 0.5725, | |
| "epoch": 0.7519927808693037, | |
| "fm_loss": 0.1875, | |
| "grad_norm": 5.200182914733887, | |
| "learning_rate": 4.932290637196144e-05, | |
| "loss": 0.76, | |
| "step": 5000 | |
| }, | |
| { | |
| "ar_loss": 0.5504, | |
| "epoch": 0.015037593984962405, | |
| "fm_loss": 0.1893, | |
| "grad_norm": 2.8439042568206787, | |
| "learning_rate": 4.929551301429187e-05, | |
| "loss": 0.7396, | |
| "step": 5100 | |
| }, | |
| { | |
| "ar_loss": 0.5503, | |
| "epoch": 0.03007518796992481, | |
| "fm_loss": 0.1831, | |
| "grad_norm": 1.733323335647583, | |
| "learning_rate": 4.926737343223295e-05, | |
| "loss": 0.7334, | |
| "step": 5200 | |
| }, | |
| { | |
| "ar_loss": 0.5514, | |
| "epoch": 0.045112781954887216, | |
| "fm_loss": 0.185, | |
| "grad_norm": 2.580357074737549, | |
| "learning_rate": 4.9238691157474316e-05, | |
| "loss": 0.7364, | |
| "step": 5300 | |
| }, | |
| { | |
| "ar_loss": 0.5493, | |
| "epoch": 0.06015037593984962, | |
| "fm_loss": 0.1835, | |
| "grad_norm": 1.9058268070220947, | |
| "learning_rate": 4.920946683143935e-05, | |
| "loss": 0.7327, | |
| "step": 5400 | |
| }, | |
| { | |
| "ar_loss": 0.5497, | |
| "epoch": 0.07518796992481203, | |
| "fm_loss": 0.184, | |
| "grad_norm": 1.3233968019485474, | |
| "learning_rate": 4.91797011076734e-05, | |
| "loss": 0.7337, | |
| "step": 5500 | |
| }, | |
| { | |
| "ar_loss": 0.5466, | |
| "epoch": 0.09022556390977443, | |
| "fm_loss": 0.1829, | |
| "grad_norm": 1.8950995206832886, | |
| "learning_rate": 4.9149394651829086e-05, | |
| "loss": 0.7295, | |
| "step": 5600 | |
| }, | |
| { | |
| "ar_loss": 0.5495, | |
| "epoch": 0.10526315789473684, | |
| "fm_loss": 0.1829, | |
| "grad_norm": 1.7234010696411133, | |
| "learning_rate": 4.911854814165145e-05, | |
| "loss": 0.7324, | |
| "step": 5700 | |
| }, | |
| { | |
| "ar_loss": 0.5475, | |
| "epoch": 0.12030075187969924, | |
| "fm_loss": 0.1821, | |
| "grad_norm": 2.068098783493042, | |
| "learning_rate": 4.908716226696284e-05, | |
| "loss": 0.7295, | |
| "step": 5800 | |
| }, | |
| { | |
| "ar_loss": 0.5457, | |
| "epoch": 0.13533834586466165, | |
| "fm_loss": 0.1837, | |
| "grad_norm": 1.4124906063079834, | |
| "learning_rate": 4.905523772964739e-05, | |
| "loss": 0.7294, | |
| "step": 5900 | |
| }, | |
| { | |
| "ar_loss": 0.5504, | |
| "epoch": 0.15037593984962405, | |
| "fm_loss": 0.1828, | |
| "grad_norm": 1.7072114944458008, | |
| "learning_rate": 4.902277524363543e-05, | |
| "loss": 0.7332, | |
| "step": 6000 | |
| }, | |
| { | |
| "ar_loss": 0.5454, | |
| "epoch": 0.16541353383458646, | |
| "fm_loss": 0.181, | |
| "grad_norm": 1.737274169921875, | |
| "learning_rate": 4.898977553488743e-05, | |
| "loss": 0.7264, | |
| "step": 6100 | |
| }, | |
| { | |
| "ar_loss": 0.5501, | |
| "epoch": 0.18045112781954886, | |
| "fm_loss": 0.1808, | |
| "grad_norm": 2.1727452278137207, | |
| "learning_rate": 4.895623934137783e-05, | |
| "loss": 0.7309, | |
| "step": 6200 | |
| }, | |
| { | |
| "ar_loss": 0.5439, | |
| "epoch": 0.19548872180451127, | |
| "fm_loss": 0.1798, | |
| "grad_norm": 2.8658437728881836, | |
| "learning_rate": 4.892216741307848e-05, | |
| "loss": 0.7237, | |
| "step": 6300 | |
| }, | |
| { | |
| "ar_loss": 0.5438, | |
| "epoch": 0.21052631578947367, | |
| "fm_loss": 0.1812, | |
| "grad_norm": 1.8495811223983765, | |
| "learning_rate": 4.888756051194193e-05, | |
| "loss": 0.725, | |
| "step": 6400 | |
| }, | |
| { | |
| "ar_loss": 0.5391, | |
| "epoch": 0.22556390977443608, | |
| "fm_loss": 0.1894, | |
| "grad_norm": 2.1281821727752686, | |
| "learning_rate": 4.885241941188435e-05, | |
| "loss": 0.7286, | |
| "step": 6500 | |
| }, | |
| { | |
| "ar_loss": 0.5437, | |
| "epoch": 0.24060150375939848, | |
| "fm_loss": 0.1787, | |
| "grad_norm": 1.7707561254501343, | |
| "learning_rate": 4.881674489876822e-05, | |
| "loss": 0.7224, | |
| "step": 6600 | |
| }, | |
| { | |
| "ar_loss": 0.5413, | |
| "epoch": 0.2556390977443609, | |
| "fm_loss": 0.1795, | |
| "grad_norm": 1.4524445533752441, | |
| "learning_rate": 4.878053777038478e-05, | |
| "loss": 0.7208, | |
| "step": 6700 | |
| }, | |
| { | |
| "ar_loss": 0.5411, | |
| "epoch": 0.2706766917293233, | |
| "fm_loss": 0.18, | |
| "grad_norm": 2.2196991443634033, | |
| "learning_rate": 4.8743798836436166e-05, | |
| "loss": 0.7212, | |
| "step": 6800 | |
| }, | |
| { | |
| "ar_loss": 0.5395, | |
| "epoch": 0.2857142857142857, | |
| "fm_loss": 0.1778, | |
| "grad_norm": 3.7135586738586426, | |
| "learning_rate": 4.8706528918517326e-05, | |
| "loss": 0.7173, | |
| "step": 6900 | |
| }, | |
| { | |
| "ar_loss": 0.5397, | |
| "epoch": 0.3007518796992481, | |
| "fm_loss": 0.179, | |
| "grad_norm": 2.551405191421509, | |
| "learning_rate": 4.866872885009762e-05, | |
| "loss": 0.7188, | |
| "step": 7000 | |
| }, | |
| { | |
| "ar_loss": 0.5433, | |
| "epoch": 0.3157894736842105, | |
| "fm_loss": 0.1769, | |
| "grad_norm": 1.5937398672103882, | |
| "learning_rate": 4.863039947650221e-05, | |
| "loss": 0.7202, | |
| "step": 7100 | |
| }, | |
| { | |
| "ar_loss": 0.537, | |
| "epoch": 0.3308270676691729, | |
| "fm_loss": 0.1778, | |
| "grad_norm": 2.1410443782806396, | |
| "learning_rate": 4.859154165489313e-05, | |
| "loss": 0.7148, | |
| "step": 7200 | |
| }, | |
| { | |
| "ar_loss": 0.5345, | |
| "epoch": 0.3458646616541353, | |
| "fm_loss": 0.1786, | |
| "grad_norm": 1.9549566507339478, | |
| "learning_rate": 4.855215625425012e-05, | |
| "loss": 0.7131, | |
| "step": 7300 | |
| }, | |
| { | |
| "ar_loss": 0.5385, | |
| "epoch": 0.3609022556390977, | |
| "fm_loss": 0.1786, | |
| "grad_norm": 1.6920804977416992, | |
| "learning_rate": 4.851224415535123e-05, | |
| "loss": 0.7171, | |
| "step": 7400 | |
| }, | |
| { | |
| "ar_loss": 0.5403, | |
| "epoch": 0.37593984962406013, | |
| "fm_loss": 0.1778, | |
| "grad_norm": 1.460734248161316, | |
| "learning_rate": 4.847180625075306e-05, | |
| "loss": 0.7182, | |
| "step": 7500 | |
| }, | |
| { | |
| "ar_loss": 0.5333, | |
| "epoch": 0.39097744360902253, | |
| "fm_loss": 0.1802, | |
| "grad_norm": 2.369119644165039, | |
| "learning_rate": 4.8430843444770856e-05, | |
| "loss": 0.7135, | |
| "step": 7600 | |
| }, | |
| { | |
| "ar_loss": 0.5343, | |
| "epoch": 0.40601503759398494, | |
| "fm_loss": 0.1777, | |
| "grad_norm": 2.0863733291625977, | |
| "learning_rate": 4.838935665345826e-05, | |
| "loss": 0.712, | |
| "step": 7700 | |
| }, | |
| { | |
| "ar_loss": 0.5361, | |
| "epoch": 0.42105263157894735, | |
| "fm_loss": 0.1826, | |
| "grad_norm": 2.1648268699645996, | |
| "learning_rate": 4.834734680458682e-05, | |
| "loss": 0.7187, | |
| "step": 7800 | |
| }, | |
| { | |
| "ar_loss": 0.5362, | |
| "epoch": 0.43609022556390975, | |
| "fm_loss": 0.1759, | |
| "grad_norm": 3.731491804122925, | |
| "learning_rate": 4.8304814837625275e-05, | |
| "loss": 0.7121, | |
| "step": 7900 | |
| }, | |
| { | |
| "ar_loss": 0.5361, | |
| "epoch": 0.45112781954887216, | |
| "fm_loss": 0.1763, | |
| "grad_norm": 3.129291296005249, | |
| "learning_rate": 4.826176170371848e-05, | |
| "loss": 0.7124, | |
| "step": 8000 | |
| }, | |
| { | |
| "ar_loss": 0.5308, | |
| "epoch": 0.46616541353383456, | |
| "fm_loss": 0.1803, | |
| "grad_norm": 1.837388038635254, | |
| "learning_rate": 4.8218188365666216e-05, | |
| "loss": 0.7111, | |
| "step": 8100 | |
| }, | |
| { | |
| "ar_loss": 0.5384, | |
| "epoch": 0.48120300751879697, | |
| "fm_loss": 0.1765, | |
| "grad_norm": 1.696321964263916, | |
| "learning_rate": 4.817409579790161e-05, | |
| "loss": 0.7148, | |
| "step": 8200 | |
| }, | |
| { | |
| "ar_loss": 0.5351, | |
| "epoch": 0.49624060150375937, | |
| "fm_loss": 0.177, | |
| "grad_norm": 2.0662057399749756, | |
| "learning_rate": 4.812948498646933e-05, | |
| "loss": 0.7121, | |
| "step": 8300 | |
| }, | |
| { | |
| "ar_loss": 0.5358, | |
| "epoch": 0.5112781954887218, | |
| "fm_loss": 0.1757, | |
| "grad_norm": 1.4204461574554443, | |
| "learning_rate": 4.80843569290036e-05, | |
| "loss": 0.7114, | |
| "step": 8400 | |
| }, | |
| { | |
| "ar_loss": 0.5316, | |
| "epoch": 0.5263157894736842, | |
| "fm_loss": 0.176, | |
| "grad_norm": 3.6314117908477783, | |
| "learning_rate": 4.80387126347058e-05, | |
| "loss": 0.7075, | |
| "step": 8500 | |
| }, | |
| { | |
| "ar_loss": 0.5307, | |
| "epoch": 0.5413533834586466, | |
| "fm_loss": 0.1754, | |
| "grad_norm": 2.164659023284912, | |
| "learning_rate": 4.799255312432199e-05, | |
| "loss": 0.706, | |
| "step": 8600 | |
| }, | |
| { | |
| "ar_loss": 0.5331, | |
| "epoch": 0.556390977443609, | |
| "fm_loss": 0.184, | |
| "grad_norm": 1.9370899200439453, | |
| "learning_rate": 4.794587943012e-05, | |
| "loss": 0.7171, | |
| "step": 8700 | |
| }, | |
| { | |
| "ar_loss": 0.5278, | |
| "epoch": 0.5714285714285714, | |
| "fm_loss": 0.1738, | |
| "grad_norm": 2.0422327518463135, | |
| "learning_rate": 4.7898692595866415e-05, | |
| "loss": 0.7016, | |
| "step": 8800 | |
| }, | |
| { | |
| "ar_loss": 0.5299, | |
| "epoch": 0.5864661654135338, | |
| "fm_loss": 0.1729, | |
| "grad_norm": 1.643233060836792, | |
| "learning_rate": 4.785099367680317e-05, | |
| "loss": 0.7028, | |
| "step": 8900 | |
| }, | |
| { | |
| "ar_loss": 0.5279, | |
| "epoch": 0.6015037593984962, | |
| "fm_loss": 0.173, | |
| "grad_norm": 1.7165354490280151, | |
| "learning_rate": 4.7802783739624e-05, | |
| "loss": 0.7009, | |
| "step": 9000 | |
| }, | |
| { | |
| "ar_loss": 0.5297, | |
| "epoch": 0.6165413533834586, | |
| "fm_loss": 0.181, | |
| "grad_norm": 1.632948875427246, | |
| "learning_rate": 4.7754063862450576e-05, | |
| "loss": 0.7107, | |
| "step": 9100 | |
| }, | |
| { | |
| "ar_loss": 0.5266, | |
| "epoch": 0.631578947368421, | |
| "fm_loss": 0.2008, | |
| "grad_norm": 2.7003791332244873, | |
| "learning_rate": 4.770483513480837e-05, | |
| "loss": 0.7274, | |
| "step": 9200 | |
| }, | |
| { | |
| "ar_loss": 0.5304, | |
| "epoch": 0.6466165413533834, | |
| "fm_loss": 0.1754, | |
| "grad_norm": 2.2766501903533936, | |
| "learning_rate": 4.765509865760233e-05, | |
| "loss": 0.7057, | |
| "step": 9300 | |
| }, | |
| { | |
| "ar_loss": 0.5262, | |
| "epoch": 0.6616541353383458, | |
| "fm_loss": 0.18, | |
| "grad_norm": 10.034100532531738, | |
| "learning_rate": 4.760485554309219e-05, | |
| "loss": 0.7061, | |
| "step": 9400 | |
| }, | |
| { | |
| "ar_loss": 0.52, | |
| "epoch": 0.6766917293233082, | |
| "fm_loss": 0.1753, | |
| "grad_norm": 1.70254385471344, | |
| "learning_rate": 4.7554106914867705e-05, | |
| "loss": 0.6953, | |
| "step": 9500 | |
| }, | |
| { | |
| "ar_loss": 0.5249, | |
| "epoch": 0.6917293233082706, | |
| "fm_loss": 0.1744, | |
| "grad_norm": 1.7009040117263794, | |
| "learning_rate": 4.750285390782342e-05, | |
| "loss": 0.6992, | |
| "step": 9600 | |
| }, | |
| { | |
| "ar_loss": 0.523, | |
| "epoch": 0.706766917293233, | |
| "fm_loss": 0.1739, | |
| "grad_norm": 1.4413669109344482, | |
| "learning_rate": 4.745109766813334e-05, | |
| "loss": 0.6968, | |
| "step": 9700 | |
| }, | |
| { | |
| "ar_loss": 0.5242, | |
| "epoch": 0.7218045112781954, | |
| "fm_loss": 0.1727, | |
| "grad_norm": 1.4672104120254517, | |
| "learning_rate": 4.739883935322532e-05, | |
| "loss": 0.6969, | |
| "step": 9800 | |
| }, | |
| { | |
| "ar_loss": 0.5244, | |
| "epoch": 0.7368421052631579, | |
| "fm_loss": 0.1728, | |
| "grad_norm": 1.458977222442627, | |
| "learning_rate": 4.734608013175512e-05, | |
| "loss": 0.6972, | |
| "step": 9900 | |
| }, | |
| { | |
| "ar_loss": 0.5247, | |
| "epoch": 0.7518796992481203, | |
| "fm_loss": 0.1721, | |
| "grad_norm": 2.412842273712158, | |
| "learning_rate": 4.72928211835803e-05, | |
| "loss": 0.6968, | |
| "step": 10000 | |
| }, | |
| { | |
| "ar_loss": 0.5206, | |
| "epoch": 0.7669172932330827, | |
| "fm_loss": 0.172, | |
| "grad_norm": 2.1207447052001953, | |
| "learning_rate": 4.723906369973386e-05, | |
| "loss": 0.6926, | |
| "step": 10100 | |
| }, | |
| { | |
| "ar_loss": 0.5214, | |
| "epoch": 0.7819548872180451, | |
| "fm_loss": 0.1735, | |
| "grad_norm": 2.6220345497131348, | |
| "learning_rate": 4.7184808882397594e-05, | |
| "loss": 0.695, | |
| "step": 10200 | |
| }, | |
| { | |
| "ar_loss": 0.5236, | |
| "epoch": 0.7969924812030075, | |
| "fm_loss": 0.1819, | |
| "grad_norm": 1.9105595350265503, | |
| "learning_rate": 4.713005794487515e-05, | |
| "loss": 0.7055, | |
| "step": 10300 | |
| }, | |
| { | |
| "ar_loss": 0.5202, | |
| "epoch": 0.8120300751879699, | |
| "fm_loss": 0.1718, | |
| "grad_norm": 2.312239646911621, | |
| "learning_rate": 4.707481211156497e-05, | |
| "loss": 0.692, | |
| "step": 10400 | |
| }, | |
| { | |
| "ar_loss": 0.5207, | |
| "epoch": 0.8270676691729323, | |
| "fm_loss": 0.1788, | |
| "grad_norm": 1.709987759590149, | |
| "learning_rate": 4.701907261793287e-05, | |
| "loss": 0.6995, | |
| "step": 10500 | |
| }, | |
| { | |
| "ar_loss": 0.5178, | |
| "epoch": 0.8421052631578947, | |
| "fm_loss": 0.172, | |
| "grad_norm": 2.644814968109131, | |
| "learning_rate": 4.696284071048444e-05, | |
| "loss": 0.6898, | |
| "step": 10600 | |
| }, | |
| { | |
| "ar_loss": 0.5175, | |
| "epoch": 0.8571428571428571, | |
| "fm_loss": 0.1737, | |
| "grad_norm": 1.4387990236282349, | |
| "learning_rate": 4.690611764673713e-05, | |
| "loss": 0.6912, | |
| "step": 10700 | |
| }, | |
| { | |
| "ar_loss": 0.5154, | |
| "epoch": 0.8721804511278195, | |
| "fm_loss": 0.1727, | |
| "grad_norm": 1.308214545249939, | |
| "learning_rate": 4.684890469519213e-05, | |
| "loss": 0.6881, | |
| "step": 10800 | |
| }, | |
| { | |
| "ar_loss": 0.5158, | |
| "epoch": 0.8872180451127819, | |
| "fm_loss": 0.1707, | |
| "grad_norm": 3.754106283187866, | |
| "learning_rate": 4.6791203135306075e-05, | |
| "loss": 0.6865, | |
| "step": 10900 | |
| }, | |
| { | |
| "ar_loss": 0.5213, | |
| "epoch": 0.9022556390977443, | |
| "fm_loss": 0.1709, | |
| "grad_norm": 1.5025776624679565, | |
| "learning_rate": 4.673301425746232e-05, | |
| "loss": 0.6922, | |
| "step": 11000 | |
| }, | |
| { | |
| "ar_loss": 0.5129, | |
| "epoch": 0.9172932330827067, | |
| "fm_loss": 0.1705, | |
| "grad_norm": 1.8450067043304443, | |
| "learning_rate": 4.667433936294217e-05, | |
| "loss": 0.6835, | |
| "step": 11100 | |
| }, | |
| { | |
| "ar_loss": 0.5176, | |
| "epoch": 0.9323308270676691, | |
| "fm_loss": 0.1768, | |
| "grad_norm": 1.772120475769043, | |
| "learning_rate": 4.661517976389574e-05, | |
| "loss": 0.6944, | |
| "step": 11200 | |
| }, | |
| { | |
| "ar_loss": 0.5117, | |
| "epoch": 0.9473684210526315, | |
| "fm_loss": 0.1722, | |
| "grad_norm": 1.8486195802688599, | |
| "learning_rate": 4.6555536783312634e-05, | |
| "loss": 0.6839, | |
| "step": 11300 | |
| }, | |
| { | |
| "ar_loss": 0.5139, | |
| "epoch": 0.9624060150375939, | |
| "fm_loss": 0.1705, | |
| "grad_norm": 3.964401960372925, | |
| "learning_rate": 4.649541175499232e-05, | |
| "loss": 0.6844, | |
| "step": 11400 | |
| }, | |
| { | |
| "ar_loss": 0.5154, | |
| "epoch": 0.9774436090225563, | |
| "fm_loss": 0.1923, | |
| "grad_norm": 2.2421326637268066, | |
| "learning_rate": 4.6434806023514354e-05, | |
| "loss": 0.7077, | |
| "step": 11500 | |
| }, | |
| { | |
| "ar_loss": 0.5147, | |
| "epoch": 0.9924812030075187, | |
| "fm_loss": 0.171, | |
| "grad_norm": 1.773090124130249, | |
| "learning_rate": 4.6373720944208275e-05, | |
| "loss": 0.6856, | |
| "step": 11600 | |
| }, | |
| { | |
| "ar_loss": 0.4967, | |
| "epoch": 1.0075187969924813, | |
| "fm_loss": 0.1708, | |
| "grad_norm": 2.0621070861816406, | |
| "learning_rate": 4.631215788312331e-05, | |
| "loss": 0.6675, | |
| "step": 11700 | |
| }, | |
| { | |
| "ar_loss": 0.4895, | |
| "epoch": 1.0225563909774436, | |
| "fm_loss": 0.1706, | |
| "grad_norm": 1.7303022146224976, | |
| "learning_rate": 4.6250118216997795e-05, | |
| "loss": 0.6601, | |
| "step": 11800 | |
| }, | |
| { | |
| "ar_loss": 0.4896, | |
| "epoch": 1.037593984962406, | |
| "fm_loss": 0.1688, | |
| "grad_norm": 2.481395959854126, | |
| "learning_rate": 4.618760333322846e-05, | |
| "loss": 0.6584, | |
| "step": 11900 | |
| }, | |
| { | |
| "ar_loss": 0.484, | |
| "epoch": 1.0526315789473684, | |
| "fm_loss": 0.1686, | |
| "grad_norm": 8.144444465637207, | |
| "learning_rate": 4.61246146298393e-05, | |
| "loss": 0.6527, | |
| "step": 12000 | |
| }, | |
| { | |
| "ar_loss": 0.4851, | |
| "epoch": 1.0676691729323309, | |
| "fm_loss": 0.1695, | |
| "grad_norm": 4.188894271850586, | |
| "learning_rate": 4.606115351545043e-05, | |
| "loss": 0.6546, | |
| "step": 12100 | |
| }, | |
| { | |
| "ar_loss": 0.4853, | |
| "epoch": 1.0827067669172932, | |
| "fm_loss": 0.1685, | |
| "grad_norm": 2.112070083618164, | |
| "learning_rate": 4.599722140924645e-05, | |
| "loss": 0.6538, | |
| "step": 12200 | |
| }, | |
| { | |
| "ar_loss": 0.4862, | |
| "epoch": 1.0977443609022557, | |
| "fm_loss": 0.1684, | |
| "grad_norm": 5.455174922943115, | |
| "learning_rate": 4.593281974094483e-05, | |
| "loss": 0.6547, | |
| "step": 12300 | |
| }, | |
| { | |
| "ar_loss": 0.4887, | |
| "epoch": 1.112781954887218, | |
| "fm_loss": 0.1682, | |
| "grad_norm": 2.762221574783325, | |
| "learning_rate": 4.5867949950763864e-05, | |
| "loss": 0.6569, | |
| "step": 12400 | |
| }, | |
| { | |
| "ar_loss": 0.4829, | |
| "epoch": 1.1278195488721805, | |
| "fm_loss": 0.1683, | |
| "grad_norm": 1.505560278892517, | |
| "learning_rate": 4.5802613489390487e-05, | |
| "loss": 0.6511, | |
| "step": 12500 | |
| }, | |
| { | |
| "ar_loss": 0.487, | |
| "epoch": 1.1428571428571428, | |
| "fm_loss": 0.1686, | |
| "grad_norm": 12.36954402923584, | |
| "learning_rate": 4.5736811817947824e-05, | |
| "loss": 0.6557, | |
| "step": 12600 | |
| }, | |
| { | |
| "ar_loss": 0.4796, | |
| "epoch": 1.1578947368421053, | |
| "fm_loss": 0.1675, | |
| "grad_norm": 1.7835297584533691, | |
| "learning_rate": 4.5670546407962525e-05, | |
| "loss": 0.6471, | |
| "step": 12700 | |
| }, | |
| { | |
| "ar_loss": 0.4841, | |
| "epoch": 1.1729323308270676, | |
| "fm_loss": 0.1685, | |
| "grad_norm": 1.500798225402832, | |
| "learning_rate": 4.560381874133186e-05, | |
| "loss": 0.6526, | |
| "step": 12800 | |
| }, | |
| { | |
| "ar_loss": 0.4818, | |
| "epoch": 1.1879699248120301, | |
| "fm_loss": 0.1682, | |
| "grad_norm": 3.4301960468292236, | |
| "learning_rate": 4.553663031029055e-05, | |
| "loss": 0.65, | |
| "step": 12900 | |
| }, | |
| { | |
| "ar_loss": 0.4821, | |
| "epoch": 1.2030075187969924, | |
| "fm_loss": 0.1699, | |
| "grad_norm": 1.9192440509796143, | |
| "learning_rate": 4.546898261737745e-05, | |
| "loss": 0.6519, | |
| "step": 13000 | |
| }, | |
| { | |
| "ar_loss": 0.4854, | |
| "epoch": 1.218045112781955, | |
| "fm_loss": 0.1688, | |
| "grad_norm": 2.5409817695617676, | |
| "learning_rate": 4.540087717540188e-05, | |
| "loss": 0.6542, | |
| "step": 13100 | |
| }, | |
| { | |
| "ar_loss": 0.4807, | |
| "epoch": 1.2330827067669172, | |
| "fm_loss": 0.1684, | |
| "grad_norm": 1.6167821884155273, | |
| "learning_rate": 4.533231550740985e-05, | |
| "loss": 0.6491, | |
| "step": 13200 | |
| }, | |
| { | |
| "ar_loss": 0.4793, | |
| "epoch": 1.2481203007518797, | |
| "fm_loss": 0.1673, | |
| "grad_norm": 2.658431053161621, | |
| "learning_rate": 4.526329914664999e-05, | |
| "loss": 0.6466, | |
| "step": 13300 | |
| }, | |
| { | |
| "ar_loss": 0.4802, | |
| "epoch": 1.263157894736842, | |
| "fm_loss": 0.1678, | |
| "grad_norm": 1.7002625465393066, | |
| "learning_rate": 4.51938296365392e-05, | |
| "loss": 0.6481, | |
| "step": 13400 | |
| }, | |
| { | |
| "ar_loss": 0.4799, | |
| "epoch": 1.2781954887218046, | |
| "fm_loss": 0.1694, | |
| "grad_norm": 2.5322110652923584, | |
| "learning_rate": 4.5123908530628254e-05, | |
| "loss": 0.6493, | |
| "step": 13500 | |
| }, | |
| { | |
| "ar_loss": 0.4807, | |
| "epoch": 1.2932330827067668, | |
| "fm_loss": 0.1674, | |
| "grad_norm": 1.8980706930160522, | |
| "learning_rate": 4.5053537392566946e-05, | |
| "loss": 0.6481, | |
| "step": 13600 | |
| }, | |
| { | |
| "ar_loss": 0.4818, | |
| "epoch": 1.3082706766917294, | |
| "fm_loss": 0.1688, | |
| "grad_norm": 1.619292140007019, | |
| "learning_rate": 4.4982717796069176e-05, | |
| "loss": 0.6507, | |
| "step": 13700 | |
| }, | |
| { | |
| "ar_loss": 0.4841, | |
| "epoch": 1.3233082706766917, | |
| "fm_loss": 0.1684, | |
| "grad_norm": 1.7827472686767578, | |
| "learning_rate": 4.491145132487775e-05, | |
| "loss": 0.6526, | |
| "step": 13800 | |
| }, | |
| { | |
| "ar_loss": 0.4794, | |
| "epoch": 1.3383458646616542, | |
| "fm_loss": 0.1779, | |
| "grad_norm": 2.303715944290161, | |
| "learning_rate": 4.483973957272895e-05, | |
| "loss": 0.6572, | |
| "step": 13900 | |
| }, | |
| { | |
| "ar_loss": 0.4729, | |
| "epoch": 1.3533834586466165, | |
| "fm_loss": 0.1767, | |
| "grad_norm": 2.095193386077881, | |
| "learning_rate": 4.476758414331691e-05, | |
| "loss": 0.6496, | |
| "step": 14000 | |
| }, | |
| { | |
| "ar_loss": 0.4765, | |
| "epoch": 1.368421052631579, | |
| "fm_loss": 0.1679, | |
| "grad_norm": 3.301593065261841, | |
| "learning_rate": 4.4694986650257754e-05, | |
| "loss": 0.6445, | |
| "step": 14100 | |
| }, | |
| { | |
| "ar_loss": 0.4787, | |
| "epoch": 1.3834586466165413, | |
| "fm_loss": 0.1675, | |
| "grad_norm": 4.087191581726074, | |
| "learning_rate": 4.462194871705347e-05, | |
| "loss": 0.6462, | |
| "step": 14200 | |
| }, | |
| { | |
| "ar_loss": 0.4788, | |
| "epoch": 1.3984962406015038, | |
| "fm_loss": 0.1666, | |
| "grad_norm": 1.5592633485794067, | |
| "learning_rate": 4.4548471977055665e-05, | |
| "loss": 0.6455, | |
| "step": 14300 | |
| }, | |
| { | |
| "ar_loss": 0.4747, | |
| "epoch": 1.413533834586466, | |
| "fm_loss": 0.1682, | |
| "grad_norm": 7.437148571014404, | |
| "learning_rate": 4.447455807342901e-05, | |
| "loss": 0.6429, | |
| "step": 14400 | |
| }, | |
| { | |
| "ar_loss": 0.4798, | |
| "epoch": 1.4285714285714286, | |
| "fm_loss": 0.1674, | |
| "grad_norm": 1.5794994831085205, | |
| "learning_rate": 4.440020865911446e-05, | |
| "loss": 0.6472, | |
| "step": 14500 | |
| }, | |
| { | |
| "ar_loss": 0.4729, | |
| "epoch": 1.443609022556391, | |
| "fm_loss": 0.1674, | |
| "grad_norm": 2.327096700668335, | |
| "learning_rate": 4.432542539679235e-05, | |
| "loss": 0.6404, | |
| "step": 14600 | |
| }, | |
| { | |
| "ar_loss": 0.4711, | |
| "epoch": 1.4586466165413534, | |
| "fm_loss": 0.1699, | |
| "grad_norm": 1.5044869184494019, | |
| "learning_rate": 4.425020995884517e-05, | |
| "loss": 0.641, | |
| "step": 14700 | |
| }, | |
| { | |
| "ar_loss": 0.4757, | |
| "epoch": 1.4736842105263157, | |
| "fm_loss": 0.1678, | |
| "grad_norm": 1.8027641773223877, | |
| "learning_rate": 4.41745640273202e-05, | |
| "loss": 0.6435, | |
| "step": 14800 | |
| }, | |
| { | |
| "ar_loss": 0.4727, | |
| "epoch": 1.4887218045112782, | |
| "fm_loss": 0.1716, | |
| "grad_norm": 1.5030885934829712, | |
| "learning_rate": 4.4098489293891845e-05, | |
| "loss": 0.6443, | |
| "step": 14900 | |
| }, | |
| { | |
| "ar_loss": 0.4741, | |
| "epoch": 1.5037593984962405, | |
| "fm_loss": 0.1678, | |
| "grad_norm": 2.254826068878174, | |
| "learning_rate": 4.4021987459823834e-05, | |
| "loss": 0.6419, | |
| "step": 15000 | |
| }, | |
| { | |
| "ar_loss": 0.4636, | |
| "epoch": 2.015368065160596, | |
| "fm_loss": 0.1667, | |
| "grad_norm": 5.636004447937012, | |
| "learning_rate": 4.3687046036407485e-05, | |
| "loss": 0.6303, | |
| "step": 15100 | |
| }, | |
| { | |
| "ar_loss": 0.4622, | |
| "epoch": 2.0307361303211926, | |
| "fm_loss": 0.1678, | |
| "grad_norm": 2.2144827842712402, | |
| "learning_rate": 4.360656750389484e-05, | |
| "loss": 0.63, | |
| "step": 15200 | |
| }, | |
| { | |
| "ar_loss": 0.4643, | |
| "epoch": 2.0461041954817887, | |
| "fm_loss": 0.1659, | |
| "grad_norm": 1.623849868774414, | |
| "learning_rate": 4.3525654376107785e-05, | |
| "loss": 0.6302, | |
| "step": 15300 | |
| }, | |
| { | |
| "ar_loss": 0.4617, | |
| "epoch": 2.061472260642385, | |
| "fm_loss": 0.1671, | |
| "grad_norm": 10.88976001739502, | |
| "learning_rate": 4.344430854294155e-05, | |
| "loss": 0.6288, | |
| "step": 15400 | |
| }, | |
| { | |
| "ar_loss": 0.4622, | |
| "epoch": 2.0768403258029813, | |
| "fm_loss": 0.1679, | |
| "grad_norm": 2.079127550125122, | |
| "learning_rate": 4.3362531904398086e-05, | |
| "loss": 0.6301, | |
| "step": 15500 | |
| }, | |
| { | |
| "ar_loss": 0.4593, | |
| "epoch": 2.092208390963578, | |
| "fm_loss": 0.1663, | |
| "grad_norm": 3.9787468910217285, | |
| "learning_rate": 4.3280326370541716e-05, | |
| "loss": 0.6256, | |
| "step": 15600 | |
| }, | |
| { | |
| "ar_loss": 0.4606, | |
| "epoch": 2.107576456124174, | |
| "fm_loss": 0.1686, | |
| "grad_norm": 2.992798089981079, | |
| "learning_rate": 4.3197693861454505e-05, | |
| "loss": 0.6292, | |
| "step": 15700 | |
| }, | |
| { | |
| "ar_loss": 0.4612, | |
| "epoch": 2.1229445212847704, | |
| "fm_loss": 0.1693, | |
| "grad_norm": 1.920535922050476, | |
| "learning_rate": 4.31146363071914e-05, | |
| "loss": 0.6305, | |
| "step": 15800 | |
| }, | |
| { | |
| "ar_loss": 0.4626, | |
| "epoch": 2.1383125864453665, | |
| "fm_loss": 0.1793, | |
| "grad_norm": 1.652984380722046, | |
| "learning_rate": 4.303115564773521e-05, | |
| "loss": 0.6419, | |
| "step": 15900 | |
| }, | |
| { | |
| "ar_loss": 0.4583, | |
| "epoch": 2.153680651605963, | |
| "fm_loss": 0.1665, | |
| "grad_norm": 1.9141411781311035, | |
| "learning_rate": 4.294725383295121e-05, | |
| "loss": 0.6248, | |
| "step": 16000 | |
| }, | |
| { | |
| "ar_loss": 0.4646, | |
| "epoch": 2.169048716766559, | |
| "fm_loss": 0.1762, | |
| "grad_norm": 1.7625855207443237, | |
| "learning_rate": 4.286293282254165e-05, | |
| "loss": 0.6407, | |
| "step": 16100 | |
| }, | |
| { | |
| "ar_loss": 0.4622, | |
| "epoch": 2.1844167819271556, | |
| "fm_loss": 0.1674, | |
| "grad_norm": 1.4725229740142822, | |
| "learning_rate": 4.2778194585999965e-05, | |
| "loss": 0.6295, | |
| "step": 16200 | |
| }, | |
| { | |
| "ar_loss": 0.4642, | |
| "epoch": 2.1997848470877517, | |
| "fm_loss": 0.1654, | |
| "grad_norm": 2.979617118835449, | |
| "learning_rate": 4.269304110256479e-05, | |
| "loss": 0.6296, | |
| "step": 16300 | |
| }, | |
| { | |
| "ar_loss": 0.4605, | |
| "epoch": 2.2151529122483478, | |
| "fm_loss": 0.1656, | |
| "grad_norm": 4.4237470626831055, | |
| "learning_rate": 4.2607474361173714e-05, | |
| "loss": 0.6262, | |
| "step": 16400 | |
| }, | |
| { | |
| "ar_loss": 0.4597, | |
| "epoch": 2.2305209774089443, | |
| "fm_loss": 0.1673, | |
| "grad_norm": 3.3088274002075195, | |
| "learning_rate": 4.2521496360416834e-05, | |
| "loss": 0.627, | |
| "step": 16500 | |
| }, | |
| { | |
| "ar_loss": 0.4588, | |
| "epoch": 2.2458890425695404, | |
| "fm_loss": 0.166, | |
| "grad_norm": 2.4934027194976807, | |
| "learning_rate": 4.243510910849006e-05, | |
| "loss": 0.6248, | |
| "step": 16600 | |
| }, | |
| { | |
| "ar_loss": 0.4577, | |
| "epoch": 2.261257107730137, | |
| "fm_loss": 0.1654, | |
| "grad_norm": 3.4698071479797363, | |
| "learning_rate": 4.234831462314822e-05, | |
| "loss": 0.6231, | |
| "step": 16700 | |
| }, | |
| { | |
| "ar_loss": 0.459, | |
| "epoch": 2.276625172890733, | |
| "fm_loss": 0.1663, | |
| "grad_norm": 1.6527596712112427, | |
| "learning_rate": 4.226111493165793e-05, | |
| "loss": 0.6253, | |
| "step": 16800 | |
| }, | |
| { | |
| "ar_loss": 0.4563, | |
| "epoch": 2.2919932380513295, | |
| "fm_loss": 0.1647, | |
| "grad_norm": 2.62147855758667, | |
| "learning_rate": 4.217351207075024e-05, | |
| "loss": 0.621, | |
| "step": 16900 | |
| }, | |
| { | |
| "ar_loss": 0.4583, | |
| "epoch": 2.3073613032119256, | |
| "fm_loss": 0.1651, | |
| "grad_norm": 2.403658151626587, | |
| "learning_rate": 4.208550808657309e-05, | |
| "loss": 0.6234, | |
| "step": 17000 | |
| }, | |
| { | |
| "ar_loss": 0.4565, | |
| "epoch": 2.322729368372522, | |
| "fm_loss": 0.1651, | |
| "grad_norm": 2.243856906890869, | |
| "learning_rate": 4.199710503464345e-05, | |
| "loss": 0.6216, | |
| "step": 17100 | |
| }, | |
| { | |
| "ar_loss": 0.456, | |
| "epoch": 2.338097433533118, | |
| "fm_loss": 0.1651, | |
| "grad_norm": 2.199291229248047, | |
| "learning_rate": 4.190830497979938e-05, | |
| "loss": 0.6211, | |
| "step": 17200 | |
| }, | |
| { | |
| "ar_loss": 0.4532, | |
| "epoch": 2.3534654986937147, | |
| "fm_loss": 0.1656, | |
| "grad_norm": 2.230714797973633, | |
| "learning_rate": 4.1819109996151775e-05, | |
| "loss": 0.6188, | |
| "step": 17300 | |
| }, | |
| { | |
| "ar_loss": 0.4526, | |
| "epoch": 2.3688335638543108, | |
| "fm_loss": 0.1651, | |
| "grad_norm": 2.124840259552002, | |
| "learning_rate": 4.172952216703588e-05, | |
| "loss": 0.6176, | |
| "step": 17400 | |
| }, | |
| { | |
| "ar_loss": 0.4502, | |
| "epoch": 2.384201629014907, | |
| "fm_loss": 0.165, | |
| "grad_norm": 2.1426265239715576, | |
| "learning_rate": 4.1639543584962726e-05, | |
| "loss": 0.6152, | |
| "step": 17500 | |
| }, | |
| { | |
| "ar_loss": 0.4542, | |
| "epoch": 2.3995696941755034, | |
| "fm_loss": 0.1651, | |
| "grad_norm": 4.364583492279053, | |
| "learning_rate": 4.154917635157015e-05, | |
| "loss": 0.6193, | |
| "step": 17600 | |
| }, | |
| { | |
| "ar_loss": 0.4558, | |
| "epoch": 2.4149377593360994, | |
| "fm_loss": 0.1651, | |
| "grad_norm": 1.8425495624542236, | |
| "learning_rate": 4.145842257757377e-05, | |
| "loss": 0.621, | |
| "step": 17700 | |
| }, | |
| { | |
| "ar_loss": 0.4533, | |
| "epoch": 2.430305824496696, | |
| "fm_loss": 0.1657, | |
| "grad_norm": 1.7966125011444092, | |
| "learning_rate": 4.136728438271768e-05, | |
| "loss": 0.619, | |
| "step": 17800 | |
| }, | |
| { | |
| "ar_loss": 0.4493, | |
| "epoch": 2.445673889657292, | |
| "fm_loss": 0.1651, | |
| "grad_norm": 2.5523791313171387, | |
| "learning_rate": 4.127576389572488e-05, | |
| "loss": 0.6144, | |
| "step": 17900 | |
| }, | |
| { | |
| "ar_loss": 0.4501, | |
| "epoch": 2.4610419548178886, | |
| "fm_loss": 0.1647, | |
| "grad_norm": 2.2898178100585938, | |
| "learning_rate": 4.1183863254247655e-05, | |
| "loss": 0.6148, | |
| "step": 18000 | |
| }, | |
| { | |
| "ar_loss": 0.4509, | |
| "epoch": 2.4764100199784846, | |
| "fm_loss": 0.1635, | |
| "grad_norm": 2.085273504257202, | |
| "learning_rate": 4.109158460481758e-05, | |
| "loss": 0.6144, | |
| "step": 18100 | |
| }, | |
| { | |
| "ar_loss": 0.4515, | |
| "epoch": 2.491778085139081, | |
| "fm_loss": 0.1651, | |
| "grad_norm": 2.7309823036193848, | |
| "learning_rate": 4.0998930102795377e-05, | |
| "loss": 0.6166, | |
| "step": 18200 | |
| }, | |
| { | |
| "ar_loss": 0.4491, | |
| "epoch": 2.5071461502996772, | |
| "fm_loss": 0.1645, | |
| "grad_norm": 1.9515680074691772, | |
| "learning_rate": 4.090590191232061e-05, | |
| "loss": 0.6137, | |
| "step": 18300 | |
| }, | |
| { | |
| "ar_loss": 0.4541, | |
| "epoch": 2.5225142154602738, | |
| "fm_loss": 0.1639, | |
| "grad_norm": 3.2586522102355957, | |
| "learning_rate": 4.0812502206261096e-05, | |
| "loss": 0.618, | |
| "step": 18400 | |
| }, | |
| { | |
| "ar_loss": 0.4463, | |
| "epoch": 2.53788228062087, | |
| "fm_loss": 0.1645, | |
| "grad_norm": 3.2903804779052734, | |
| "learning_rate": 4.071873316616219e-05, | |
| "loss": 0.6108, | |
| "step": 18500 | |
| }, | |
| { | |
| "ar_loss": 0.4465, | |
| "epoch": 2.553250345781466, | |
| "fm_loss": 0.1659, | |
| "grad_norm": 3.0805041790008545, | |
| "learning_rate": 4.062459698219583e-05, | |
| "loss": 0.6124, | |
| "step": 18600 | |
| }, | |
| { | |
| "ar_loss": 0.4476, | |
| "epoch": 2.5686184109420624, | |
| "fm_loss": 0.1643, | |
| "grad_norm": 10.558906555175781, | |
| "learning_rate": 4.053009585310933e-05, | |
| "loss": 0.6119, | |
| "step": 18700 | |
| }, | |
| { | |
| "ar_loss": 0.4461, | |
| "epoch": 2.5839864761026585, | |
| "fm_loss": 0.164, | |
| "grad_norm": 1.7276713848114014, | |
| "learning_rate": 4.04352319861741e-05, | |
| "loss": 0.6102, | |
| "step": 18800 | |
| }, | |
| { | |
| "ar_loss": 0.4486, | |
| "epoch": 2.599354541263255, | |
| "fm_loss": 0.1655, | |
| "grad_norm": 2.4759597778320312, | |
| "learning_rate": 4.034000759713401e-05, | |
| "loss": 0.6142, | |
| "step": 18900 | |
| }, | |
| { | |
| "ar_loss": 0.4461, | |
| "epoch": 2.614722606423851, | |
| "fm_loss": 0.164, | |
| "grad_norm": 1.5683785676956177, | |
| "learning_rate": 4.024442491015372e-05, | |
| "loss": 0.6101, | |
| "step": 19000 | |
| }, | |
| { | |
| "ar_loss": 0.4495, | |
| "epoch": 2.6300906715844476, | |
| "fm_loss": 0.1637, | |
| "grad_norm": 3.866807460784912, | |
| "learning_rate": 4.014848615776666e-05, | |
| "loss": 0.6132, | |
| "step": 19100 | |
| }, | |
| { | |
| "ar_loss": 0.447, | |
| "epoch": 2.6454587367450437, | |
| "fm_loss": 0.1635, | |
| "grad_norm": 1.7105318307876587, | |
| "learning_rate": 4.00521935808229e-05, | |
| "loss": 0.6105, | |
| "step": 19200 | |
| }, | |
| { | |
| "ar_loss": 0.4454, | |
| "epoch": 2.66082680190564, | |
| "fm_loss": 0.1634, | |
| "grad_norm": 2.6716930866241455, | |
| "learning_rate": 3.995554942843687e-05, | |
| "loss": 0.6088, | |
| "step": 19300 | |
| }, | |
| { | |
| "ar_loss": 0.4491, | |
| "epoch": 2.6761948670662363, | |
| "fm_loss": 0.1647, | |
| "grad_norm": 1.8674702644348145, | |
| "learning_rate": 3.9858555957934715e-05, | |
| "loss": 0.6137, | |
| "step": 19400 | |
| }, | |
| { | |
| "ar_loss": 0.445, | |
| "epoch": 2.691562932226833, | |
| "fm_loss": 0.1631, | |
| "grad_norm": 2.926995277404785, | |
| "learning_rate": 3.976121543480169e-05, | |
| "loss": 0.6081, | |
| "step": 19500 | |
| }, | |
| { | |
| "ar_loss": 0.4416, | |
| "epoch": 2.706930997387429, | |
| "fm_loss": 0.1641, | |
| "grad_norm": 2.341655731201172, | |
| "learning_rate": 3.966353013262917e-05, | |
| "loss": 0.6057, | |
| "step": 19600 | |
| }, | |
| { | |
| "ar_loss": 0.4406, | |
| "epoch": 2.722299062548025, | |
| "fm_loss": 0.1648, | |
| "grad_norm": 1.6259019374847412, | |
| "learning_rate": 3.956550233306155e-05, | |
| "loss": 0.6054, | |
| "step": 19700 | |
| }, | |
| { | |
| "ar_loss": 0.4439, | |
| "epoch": 2.7376671277086215, | |
| "fm_loss": 0.1626, | |
| "grad_norm": 3.2317330837249756, | |
| "learning_rate": 3.946713432574299e-05, | |
| "loss": 0.6065, | |
| "step": 19800 | |
| }, | |
| { | |
| "ar_loss": 0.4445, | |
| "epoch": 2.7530351928692176, | |
| "fm_loss": 0.1636, | |
| "grad_norm": 2.053748369216919, | |
| "learning_rate": 3.936842840826391e-05, | |
| "loss": 0.6081, | |
| "step": 19900 | |
| }, | |
| { | |
| "ar_loss": 0.444, | |
| "epoch": 2.768403258029814, | |
| "fm_loss": 0.1635, | |
| "grad_norm": 3.10992431640625, | |
| "learning_rate": 3.9269386886107304e-05, | |
| "loss": 0.6076, | |
| "step": 20000 | |
| }, | |
| { | |
| "ar_loss": 0.4431, | |
| "epoch": 2.78377132319041, | |
| "fm_loss": 0.1634, | |
| "grad_norm": 1.6531990766525269, | |
| "learning_rate": 3.9170012072594944e-05, | |
| "loss": 0.6065, | |
| "step": 20100 | |
| }, | |
| { | |
| "ar_loss": 0.4413, | |
| "epoch": 2.7991393883510067, | |
| "fm_loss": 0.1642, | |
| "grad_norm": 2.239360809326172, | |
| "learning_rate": 3.90703062888333e-05, | |
| "loss": 0.6055, | |
| "step": 20200 | |
| }, | |
| { | |
| "ar_loss": 0.4411, | |
| "epoch": 2.814507453511603, | |
| "fm_loss": 0.1635, | |
| "grad_norm": 2.6573100090026855, | |
| "learning_rate": 3.8970271863659366e-05, | |
| "loss": 0.6046, | |
| "step": 20300 | |
| }, | |
| { | |
| "ar_loss": 0.4388, | |
| "epoch": 2.8298755186721993, | |
| "fm_loss": 0.1639, | |
| "grad_norm": 2.9059882164001465, | |
| "learning_rate": 3.886991113358621e-05, | |
| "loss": 0.6027, | |
| "step": 20400 | |
| }, | |
| { | |
| "ar_loss": 0.4413, | |
| "epoch": 2.8452435838327954, | |
| "fm_loss": 0.1647, | |
| "grad_norm": 1.4836231470108032, | |
| "learning_rate": 3.876922644274847e-05, | |
| "loss": 0.6061, | |
| "step": 20500 | |
| }, | |
| { | |
| "ar_loss": 0.4394, | |
| "epoch": 2.860611648993392, | |
| "fm_loss": 0.1639, | |
| "grad_norm": 3.667249917984009, | |
| "learning_rate": 3.866822014284753e-05, | |
| "loss": 0.6033, | |
| "step": 20600 | |
| }, | |
| { | |
| "ar_loss": 0.437, | |
| "epoch": 2.875979714153988, | |
| "fm_loss": 0.1631, | |
| "grad_norm": 2.724766731262207, | |
| "learning_rate": 3.8566894593096646e-05, | |
| "loss": 0.6001, | |
| "step": 20700 | |
| }, | |
| { | |
| "ar_loss": 0.4408, | |
| "epoch": 2.891347779314584, | |
| "fm_loss": 0.1634, | |
| "grad_norm": 3.302048683166504, | |
| "learning_rate": 3.846525216016581e-05, | |
| "loss": 0.6043, | |
| "step": 20800 | |
| }, | |
| { | |
| "ar_loss": 0.4374, | |
| "epoch": 2.9067158444751806, | |
| "fm_loss": 0.1632, | |
| "grad_norm": 5.427467346191406, | |
| "learning_rate": 3.836329521812651e-05, | |
| "loss": 0.6006, | |
| "step": 20900 | |
| }, | |
| { | |
| "ar_loss": 0.4404, | |
| "epoch": 2.922083909635777, | |
| "fm_loss": 0.1632, | |
| "grad_norm": 2.2337872982025146, | |
| "learning_rate": 3.826102614839621e-05, | |
| "loss": 0.6037, | |
| "step": 21000 | |
| }, | |
| { | |
| "ar_loss": 0.4384, | |
| "epoch": 2.937451974796373, | |
| "fm_loss": 0.1629, | |
| "grad_norm": 13.053716659545898, | |
| "learning_rate": 3.815844733968281e-05, | |
| "loss": 0.6014, | |
| "step": 21100 | |
| }, | |
| { | |
| "ar_loss": 0.4374, | |
| "epoch": 2.9528200399569693, | |
| "fm_loss": 0.1623, | |
| "grad_norm": 2.655907392501831, | |
| "learning_rate": 3.8055561187928776e-05, | |
| "loss": 0.5997, | |
| "step": 21200 | |
| }, | |
| { | |
| "ar_loss": 0.4344, | |
| "epoch": 2.968188105117566, | |
| "fm_loss": 0.163, | |
| "grad_norm": 4.967069625854492, | |
| "learning_rate": 3.795237009625523e-05, | |
| "loss": 0.5974, | |
| "step": 21300 | |
| }, | |
| { | |
| "ar_loss": 0.4384, | |
| "epoch": 2.983556170278162, | |
| "fm_loss": 0.1639, | |
| "grad_norm": 1.5047295093536377, | |
| "learning_rate": 3.784887647490581e-05, | |
| "loss": 0.6023, | |
| "step": 21400 | |
| }, | |
| { | |
| "ar_loss": 0.435, | |
| "epoch": 2.9989242354387584, | |
| "fm_loss": 0.1626, | |
| "grad_norm": 2.5669796466827393, | |
| "learning_rate": 3.774508274119035e-05, | |
| "loss": 0.5976, | |
| "step": 21500 | |
| }, | |
| { | |
| "ar_loss": 0.4195, | |
| "epoch": 3.0142923005993545, | |
| "fm_loss": 0.1629, | |
| "grad_norm": 2.004462718963623, | |
| "learning_rate": 3.764099131942846e-05, | |
| "loss": 0.5825, | |
| "step": 21600 | |
| }, | |
| { | |
| "ar_loss": 0.4153, | |
| "epoch": 3.029660365759951, | |
| "fm_loss": 0.1619, | |
| "grad_norm": 4.897035121917725, | |
| "learning_rate": 3.753660464089285e-05, | |
| "loss": 0.5773, | |
| "step": 21700 | |
| }, | |
| { | |
| "ar_loss": 0.414, | |
| "epoch": 3.045028430920547, | |
| "fm_loss": 0.1628, | |
| "grad_norm": 5.358692169189453, | |
| "learning_rate": 3.743192514375257e-05, | |
| "loss": 0.5768, | |
| "step": 21800 | |
| }, | |
| { | |
| "ar_loss": 0.417, | |
| "epoch": 3.060396496081143, | |
| "fm_loss": 0.1624, | |
| "grad_norm": 2.0926132202148438, | |
| "learning_rate": 3.732695527301609e-05, | |
| "loss": 0.5793, | |
| "step": 21900 | |
| }, | |
| { | |
| "ar_loss": 0.4168, | |
| "epoch": 3.0757645612417397, | |
| "fm_loss": 0.1626, | |
| "grad_norm": 2.1865603923797607, | |
| "learning_rate": 3.722169748047413e-05, | |
| "loss": 0.5793, | |
| "step": 22000 | |
| }, | |
| { | |
| "ar_loss": 0.4142, | |
| "epoch": 3.0911326264023358, | |
| "fm_loss": 0.163, | |
| "grad_norm": 1.9334361553192139, | |
| "learning_rate": 3.711615422464244e-05, | |
| "loss": 0.5772, | |
| "step": 22100 | |
| }, | |
| { | |
| "ar_loss": 0.4156, | |
| "epoch": 3.1065006915629323, | |
| "fm_loss": 0.163, | |
| "grad_norm": 2.2630386352539062, | |
| "learning_rate": 3.701032797070436e-05, | |
| "loss": 0.5785, | |
| "step": 22200 | |
| }, | |
| { | |
| "ar_loss": 0.4122, | |
| "epoch": 3.1218687567235284, | |
| "fm_loss": 0.1706, | |
| "grad_norm": 1.959439754486084, | |
| "learning_rate": 3.690422119045325e-05, | |
| "loss": 0.5828, | |
| "step": 22300 | |
| }, | |
| { | |
| "ar_loss": 0.4142, | |
| "epoch": 3.137236821884125, | |
| "fm_loss": 0.1717, | |
| "grad_norm": 2.2783379554748535, | |
| "learning_rate": 3.6797836362234745e-05, | |
| "loss": 0.5859, | |
| "step": 22400 | |
| }, | |
| { | |
| "ar_loss": 0.4142, | |
| "epoch": 3.152604887044721, | |
| "fm_loss": 0.1634, | |
| "grad_norm": 2.1327381134033203, | |
| "learning_rate": 3.669117597088885e-05, | |
| "loss": 0.5776, | |
| "step": 22500 | |
| }, | |
| { | |
| "ar_loss": 0.4125, | |
| "epoch": 3.1679729522053175, | |
| "fm_loss": 0.1665, | |
| "grad_norm": 2.315673828125, | |
| "learning_rate": 3.658424250769195e-05, | |
| "loss": 0.579, | |
| "step": 22600 | |
| }, | |
| { | |
| "ar_loss": 0.4148, | |
| "epoch": 3.1833410173659136, | |
| "fm_loss": 0.1629, | |
| "grad_norm": 3.1342201232910156, | |
| "learning_rate": 3.647703847029858e-05, | |
| "loss": 0.5778, | |
| "step": 22700 | |
| }, | |
| { | |
| "ar_loss": 0.4164, | |
| "epoch": 3.19870908252651, | |
| "fm_loss": 0.1615, | |
| "grad_norm": 2.276103973388672, | |
| "learning_rate": 3.6369566362683115e-05, | |
| "loss": 0.5778, | |
| "step": 22800 | |
| }, | |
| { | |
| "ar_loss": 0.415, | |
| "epoch": 3.214077147687106, | |
| "fm_loss": 0.1634, | |
| "grad_norm": 3.8148179054260254, | |
| "learning_rate": 3.626182869508124e-05, | |
| "loss": 0.5784, | |
| "step": 22900 | |
| }, | |
| { | |
| "ar_loss": 0.4135, | |
| "epoch": 3.2294452128477023, | |
| "fm_loss": 0.163, | |
| "grad_norm": 2.029802083969116, | |
| "learning_rate": 3.6153827983931395e-05, | |
| "loss": 0.5765, | |
| "step": 23000 | |
| }, | |
| { | |
| "ar_loss": 0.4108, | |
| "epoch": 3.2448132780082988, | |
| "fm_loss": 0.1616, | |
| "grad_norm": 1.9012216329574585, | |
| "learning_rate": 3.6045566751815906e-05, | |
| "loss": 0.5724, | |
| "step": 23100 | |
| }, | |
| { | |
| "ar_loss": 0.4118, | |
| "epoch": 3.260181343168895, | |
| "fm_loss": 0.1631, | |
| "grad_norm": 1.9454518556594849, | |
| "learning_rate": 3.593704752740214e-05, | |
| "loss": 0.5749, | |
| "step": 23200 | |
| }, | |
| { | |
| "ar_loss": 0.4114, | |
| "epoch": 3.2755494083294914, | |
| "fm_loss": 0.1646, | |
| "grad_norm": 5.499399662017822, | |
| "learning_rate": 3.5828272845383395e-05, | |
| "loss": 0.576, | |
| "step": 23300 | |
| }, | |
| { | |
| "ar_loss": 0.4099, | |
| "epoch": 3.2909174734900875, | |
| "fm_loss": 0.1622, | |
| "grad_norm": 2.5215775966644287, | |
| "learning_rate": 3.571924524641973e-05, | |
| "loss": 0.5721, | |
| "step": 23400 | |
| }, | |
| { | |
| "ar_loss": 0.4132, | |
| "epoch": 3.306285538650684, | |
| "fm_loss": 0.1619, | |
| "grad_norm": 1.9290796518325806, | |
| "learning_rate": 3.56099672770786e-05, | |
| "loss": 0.575, | |
| "step": 23500 | |
| }, | |
| { | |
| "ar_loss": 0.4106, | |
| "epoch": 3.32165360381128, | |
| "fm_loss": 0.1653, | |
| "grad_norm": 3.0975518226623535, | |
| "learning_rate": 3.550044148977539e-05, | |
| "loss": 0.5759, | |
| "step": 23600 | |
| }, | |
| { | |
| "ar_loss": 0.4109, | |
| "epoch": 3.3370216689718766, | |
| "fm_loss": 0.1625, | |
| "grad_norm": 3.079251289367676, | |
| "learning_rate": 3.539067044271378e-05, | |
| "loss": 0.5734, | |
| "step": 23700 | |
| }, | |
| { | |
| "ar_loss": 0.4106, | |
| "epoch": 3.3523897341324727, | |
| "fm_loss": 0.162, | |
| "grad_norm": 3.6910111904144287, | |
| "learning_rate": 3.5280656699826016e-05, | |
| "loss": 0.5726, | |
| "step": 23800 | |
| }, | |
| { | |
| "ar_loss": 0.4081, | |
| "epoch": 3.367757799293069, | |
| "fm_loss": 0.1616, | |
| "grad_norm": 2.8438339233398438, | |
| "learning_rate": 3.5170402830713004e-05, | |
| "loss": 0.5698, | |
| "step": 23900 | |
| }, | |
| { | |
| "ar_loss": 0.4122, | |
| "epoch": 3.3831258644536653, | |
| "fm_loss": 0.1616, | |
| "grad_norm": 5.298975467681885, | |
| "learning_rate": 3.505991141058431e-05, | |
| "loss": 0.5738, | |
| "step": 24000 | |
| }, | |
| { | |
| "ar_loss": 0.4107, | |
| "epoch": 3.3984939296142613, | |
| "fm_loss": 0.1614, | |
| "grad_norm": 2.1549863815307617, | |
| "learning_rate": 3.494918502019798e-05, | |
| "loss": 0.5721, | |
| "step": 24100 | |
| }, | |
| { | |
| "ar_loss": 0.4095, | |
| "epoch": 3.413861994774858, | |
| "fm_loss": 0.1615, | |
| "grad_norm": 2.1645219326019287, | |
| "learning_rate": 3.483822624580031e-05, | |
| "loss": 0.571, | |
| "step": 24200 | |
| }, | |
| { | |
| "ar_loss": 0.4102, | |
| "epoch": 3.429230059935454, | |
| "fm_loss": 0.1623, | |
| "grad_norm": 3.294847249984741, | |
| "learning_rate": 3.472703767906539e-05, | |
| "loss": 0.5724, | |
| "step": 24300 | |
| }, | |
| { | |
| "ar_loss": 0.4116, | |
| "epoch": 3.4445981250960505, | |
| "fm_loss": 0.1628, | |
| "grad_norm": 2.6318018436431885, | |
| "learning_rate": 3.461562191703459e-05, | |
| "loss": 0.5744, | |
| "step": 24400 | |
| }, | |
| { | |
| "ar_loss": 0.4116, | |
| "epoch": 3.4599661902566465, | |
| "fm_loss": 0.161, | |
| "grad_norm": 4.470794677734375, | |
| "learning_rate": 3.450398156205592e-05, | |
| "loss": 0.5726, | |
| "step": 24500 | |
| }, | |
| { | |
| "ar_loss": 0.4101, | |
| "epoch": 3.475334255417243, | |
| "fm_loss": 0.1609, | |
| "grad_norm": 3.786229133605957, | |
| "learning_rate": 3.43921192217232e-05, | |
| "loss": 0.571, | |
| "step": 24600 | |
| }, | |
| { | |
| "ar_loss": 0.4085, | |
| "epoch": 3.490702320577839, | |
| "fm_loss": 0.1606, | |
| "grad_norm": 3.3834242820739746, | |
| "learning_rate": 3.42800375088152e-05, | |
| "loss": 0.5691, | |
| "step": 24700 | |
| }, | |
| { | |
| "ar_loss": 0.4106, | |
| "epoch": 3.5060703857384357, | |
| "fm_loss": 0.1612, | |
| "grad_norm": 2.6395816802978516, | |
| "learning_rate": 3.4167739041234595e-05, | |
| "loss": 0.5718, | |
| "step": 24800 | |
| }, | |
| { | |
| "ar_loss": 0.4095, | |
| "epoch": 3.5214384508990317, | |
| "fm_loss": 0.161, | |
| "grad_norm": 3.1765825748443604, | |
| "learning_rate": 3.405522644194682e-05, | |
| "loss": 0.5705, | |
| "step": 24900 | |
| }, | |
| { | |
| "ar_loss": 0.4098, | |
| "epoch": 3.5368065160596283, | |
| "fm_loss": 0.1619, | |
| "grad_norm": 2.2531938552856445, | |
| "learning_rate": 3.3942502338918795e-05, | |
| "loss": 0.5716, | |
| "step": 25000 | |
| }, | |
| { | |
| "ar_loss": 0.4074, | |
| "epoch": 3.5521745812202243, | |
| "fm_loss": 0.1624, | |
| "grad_norm": 2.6430165767669678, | |
| "learning_rate": 3.382956936505755e-05, | |
| "loss": 0.5698, | |
| "step": 25100 | |
| }, | |
| { | |
| "ar_loss": 0.4096, | |
| "epoch": 3.5675426463808204, | |
| "fm_loss": 0.1607, | |
| "grad_norm": 2.1325323581695557, | |
| "learning_rate": 3.371643015814874e-05, | |
| "loss": 0.5703, | |
| "step": 25200 | |
| }, | |
| { | |
| "ar_loss": 0.4111, | |
| "epoch": 3.582910711541417, | |
| "fm_loss": 0.1614, | |
| "grad_norm": 2.9714601039886475, | |
| "learning_rate": 3.360308736079502e-05, | |
| "loss": 0.5725, | |
| "step": 25300 | |
| }, | |
| { | |
| "ar_loss": 0.4086, | |
| "epoch": 3.5982787767020135, | |
| "fm_loss": 0.1612, | |
| "grad_norm": 2.1280746459960938, | |
| "learning_rate": 3.348954362035432e-05, | |
| "loss": 0.5698, | |
| "step": 25400 | |
| }, | |
| { | |
| "ar_loss": 0.4074, | |
| "epoch": 3.6136468418626095, | |
| "fm_loss": 0.1608, | |
| "grad_norm": 8.301246643066406, | |
| "learning_rate": 3.337580158887802e-05, | |
| "loss": 0.5681, | |
| "step": 25500 | |
| }, | |
| { | |
| "ar_loss": 0.4054, | |
| "epoch": 3.6290149070232056, | |
| "fm_loss": 0.164, | |
| "grad_norm": 2.632568120956421, | |
| "learning_rate": 3.326186392304901e-05, | |
| "loss": 0.5694, | |
| "step": 25600 | |
| }, | |
| { | |
| "ar_loss": 0.4076, | |
| "epoch": 3.644382972183802, | |
| "fm_loss": 0.1616, | |
| "grad_norm": 4.138895511627197, | |
| "learning_rate": 3.314773328411962e-05, | |
| "loss": 0.5692, | |
| "step": 25700 | |
| }, | |
| { | |
| "ar_loss": 0.4059, | |
| "epoch": 3.659751037344398, | |
| "fm_loss": 0.1616, | |
| "grad_norm": 2.5377886295318604, | |
| "learning_rate": 3.3033412337849466e-05, | |
| "loss": 0.5675, | |
| "step": 25800 | |
| }, | |
| { | |
| "ar_loss": 0.4085, | |
| "epoch": 3.6751191025049947, | |
| "fm_loss": 0.1617, | |
| "grad_norm": 2.334913730621338, | |
| "learning_rate": 3.2918903754443195e-05, | |
| "loss": 0.5702, | |
| "step": 25900 | |
| }, | |
| { | |
| "ar_loss": 0.4064, | |
| "epoch": 3.690487167665591, | |
| "fm_loss": 0.1625, | |
| "grad_norm": 1.9625128507614136, | |
| "learning_rate": 3.2804210208488114e-05, | |
| "loss": 0.5689, | |
| "step": 26000 | |
| }, | |
| { | |
| "ar_loss": 0.4057, | |
| "epoch": 3.7058552328261873, | |
| "fm_loss": 0.1617, | |
| "grad_norm": 2.2251503467559814, | |
| "learning_rate": 3.268933437889172e-05, | |
| "loss": 0.5674, | |
| "step": 26100 | |
| }, | |
| { | |
| "ar_loss": 0.4066, | |
| "epoch": 3.7212232979867834, | |
| "fm_loss": 0.1617, | |
| "grad_norm": 3.2878170013427734, | |
| "learning_rate": 3.2574278948819105e-05, | |
| "loss": 0.5683, | |
| "step": 26200 | |
| }, | |
| { | |
| "ar_loss": 0.4097, | |
| "epoch": 3.7365913631473795, | |
| "fm_loss": 0.1618, | |
| "grad_norm": 3.281667947769165, | |
| "learning_rate": 3.2459046605630334e-05, | |
| "loss": 0.5715, | |
| "step": 26300 | |
| }, | |
| { | |
| "ar_loss": 0.406, | |
| "epoch": 3.751959428307976, | |
| "fm_loss": 0.1607, | |
| "grad_norm": 2.7066891193389893, | |
| "learning_rate": 3.234364004081763e-05, | |
| "loss": 0.5667, | |
| "step": 26400 | |
| }, | |
| { | |
| "ar_loss": 0.4041, | |
| "epoch": 3.7673274934685725, | |
| "fm_loss": 0.1614, | |
| "grad_norm": 4.11818790435791, | |
| "learning_rate": 3.222806194994253e-05, | |
| "loss": 0.5655, | |
| "step": 26500 | |
| }, | |
| { | |
| "ar_loss": 0.4039, | |
| "epoch": 3.7826955586291686, | |
| "fm_loss": 0.1611, | |
| "grad_norm": 2.0963382720947266, | |
| "learning_rate": 3.211231503257292e-05, | |
| "loss": 0.565, | |
| "step": 26600 | |
| }, | |
| { | |
| "ar_loss": 0.4039, | |
| "epoch": 3.7980636237897647, | |
| "fm_loss": 0.1612, | |
| "grad_norm": 8.769935607910156, | |
| "learning_rate": 3.199640199221998e-05, | |
| "loss": 0.5651, | |
| "step": 26700 | |
| }, | |
| { | |
| "ar_loss": 0.407, | |
| "epoch": 3.813431688950361, | |
| "fm_loss": 0.1614, | |
| "grad_norm": 4.367818832397461, | |
| "learning_rate": 3.188032553627505e-05, | |
| "loss": 0.5684, | |
| "step": 26800 | |
| }, | |
| { | |
| "ar_loss": 0.4025, | |
| "epoch": 3.8287997541109573, | |
| "fm_loss": 0.1612, | |
| "grad_norm": 3.1772165298461914, | |
| "learning_rate": 3.1764088375946355e-05, | |
| "loss": 0.5637, | |
| "step": 26900 | |
| }, | |
| { | |
| "ar_loss": 0.4051, | |
| "epoch": 3.844167819271554, | |
| "fm_loss": 0.1606, | |
| "grad_norm": 2.3004674911499023, | |
| "learning_rate": 3.1647693226195764e-05, | |
| "loss": 0.5657, | |
| "step": 27000 | |
| }, | |
| { | |
| "ar_loss": 0.4023, | |
| "epoch": 3.85953588443215, | |
| "fm_loss": 0.1609, | |
| "grad_norm": 2.438415050506592, | |
| "learning_rate": 3.1531142805675244e-05, | |
| "loss": 0.5633, | |
| "step": 27100 | |
| }, | |
| { | |
| "ar_loss": 0.405, | |
| "epoch": 3.8749039495927464, | |
| "fm_loss": 0.1615, | |
| "grad_norm": 2.6293303966522217, | |
| "learning_rate": 3.141443983666349e-05, | |
| "loss": 0.5666, | |
| "step": 27200 | |
| }, | |
| { | |
| "ar_loss": 0.4022, | |
| "epoch": 3.8902720147533425, | |
| "fm_loss": 0.1609, | |
| "grad_norm": 3.9701802730560303, | |
| "learning_rate": 3.1297587045002265e-05, | |
| "loss": 0.5631, | |
| "step": 27300 | |
| }, | |
| { | |
| "ar_loss": 0.4008, | |
| "epoch": 3.9056400799139386, | |
| "fm_loss": 0.1615, | |
| "grad_norm": 1.8459994792938232, | |
| "learning_rate": 3.118058716003277e-05, | |
| "loss": 0.5623, | |
| "step": 27400 | |
| }, | |
| { | |
| "ar_loss": 0.4058, | |
| "epoch": 3.921008145074535, | |
| "fm_loss": 0.1612, | |
| "grad_norm": 10.706082344055176, | |
| "learning_rate": 3.106344291453185e-05, | |
| "loss": 0.567, | |
| "step": 27500 | |
| }, | |
| { | |
| "ar_loss": 0.4022, | |
| "epoch": 3.9363762102351316, | |
| "fm_loss": 0.1609, | |
| "grad_norm": 2.411083221435547, | |
| "learning_rate": 3.09461570446482e-05, | |
| "loss": 0.5632, | |
| "step": 27600 | |
| }, | |
| { | |
| "ar_loss": 0.4021, | |
| "epoch": 3.9517442753957277, | |
| "fm_loss": 0.1609, | |
| "grad_norm": 1.9293774366378784, | |
| "learning_rate": 3.082873228983847e-05, | |
| "loss": 0.563, | |
| "step": 27700 | |
| }, | |
| { | |
| "ar_loss": 0.4021, | |
| "epoch": 3.967112340556324, | |
| "fm_loss": 0.1607, | |
| "grad_norm": 2.12009334564209, | |
| "learning_rate": 3.071117139280325e-05, | |
| "loss": 0.5628, | |
| "step": 27800 | |
| }, | |
| { | |
| "ar_loss": 0.4052, | |
| "epoch": 3.9824804057169203, | |
| "fm_loss": 0.16, | |
| "grad_norm": 1.866605281829834, | |
| "learning_rate": 3.059347709942299e-05, | |
| "loss": 0.5652, | |
| "step": 27900 | |
| }, | |
| { | |
| "ar_loss": 0.404, | |
| "epoch": 3.9978484708775164, | |
| "fm_loss": 0.1614, | |
| "grad_norm": 3.483856439590454, | |
| "learning_rate": 3.0475652158693912e-05, | |
| "loss": 0.5653, | |
| "step": 28000 | |
| }, | |
| { | |
| "ar_loss": 0.3897, | |
| "epoch": 4.0132165360381125, | |
| "fm_loss": 0.1654, | |
| "grad_norm": 6.412163734436035, | |
| "learning_rate": 3.0357699322663784e-05, | |
| "loss": 0.5552, | |
| "step": 28100 | |
| }, | |
| { | |
| "ar_loss": 0.3912, | |
| "epoch": 4.028584601198709, | |
| "fm_loss": 0.1622, | |
| "grad_norm": 2.0246286392211914, | |
| "learning_rate": 3.023962134636763e-05, | |
| "loss": 0.5535, | |
| "step": 28200 | |
| }, | |
| { | |
| "ar_loss": 0.3892, | |
| "epoch": 4.0439526663593055, | |
| "fm_loss": 0.1619, | |
| "grad_norm": 1.9690488576889038, | |
| "learning_rate": 3.0121420987763393e-05, | |
| "loss": 0.5511, | |
| "step": 28300 | |
| }, | |
| { | |
| "ar_loss": 0.3891, | |
| "epoch": 4.059320731519902, | |
| "fm_loss": 0.161, | |
| "grad_norm": 2.742201805114746, | |
| "learning_rate": 3.0003101007667485e-05, | |
| "loss": 0.55, | |
| "step": 28400 | |
| }, | |
| { | |
| "ar_loss": 0.3889, | |
| "epoch": 4.074688796680498, | |
| "fm_loss": 0.1608, | |
| "grad_norm": 15.931248664855957, | |
| "learning_rate": 2.9884664169690356e-05, | |
| "loss": 0.5496, | |
| "step": 28500 | |
| }, | |
| { | |
| "ar_loss": 0.3909, | |
| "epoch": 4.090056861841094, | |
| "fm_loss": 0.16, | |
| "grad_norm": 2.156740665435791, | |
| "learning_rate": 2.976611324017191e-05, | |
| "loss": 0.5509, | |
| "step": 28600 | |
| }, | |
| { | |
| "ar_loss": 0.388, | |
| "epoch": 4.105424927001691, | |
| "fm_loss": 0.1659, | |
| "grad_norm": 5.583569049835205, | |
| "learning_rate": 2.9647450988116893e-05, | |
| "loss": 0.5539, | |
| "step": 28700 | |
| }, | |
| { | |
| "ar_loss": 0.3909, | |
| "epoch": 4.120792992162286, | |
| "fm_loss": 0.1597, | |
| "grad_norm": 2.5985569953918457, | |
| "learning_rate": 2.9528680185130214e-05, | |
| "loss": 0.5505, | |
| "step": 28800 | |
| }, | |
| { | |
| "ar_loss": 0.3892, | |
| "epoch": 4.136161057322883, | |
| "fm_loss": 0.1596, | |
| "grad_norm": 3.106172800064087, | |
| "learning_rate": 2.9409803605352237e-05, | |
| "loss": 0.5488, | |
| "step": 28900 | |
| }, | |
| { | |
| "ar_loss": 0.3863, | |
| "epoch": 4.151529122483479, | |
| "fm_loss": 0.1611, | |
| "grad_norm": 2.211129665374756, | |
| "learning_rate": 2.929082402539395e-05, | |
| "loss": 0.5473, | |
| "step": 29000 | |
| }, | |
| { | |
| "ar_loss": 0.39, | |
| "epoch": 4.166897187644076, | |
| "fm_loss": 0.1602, | |
| "grad_norm": 1.949877142906189, | |
| "learning_rate": 2.9171744224272113e-05, | |
| "loss": 0.5502, | |
| "step": 29100 | |
| }, | |
| { | |
| "ar_loss": 0.387, | |
| "epoch": 4.1822652528046715, | |
| "fm_loss": 0.1646, | |
| "grad_norm": 8.61090087890625, | |
| "learning_rate": 2.9052566983344388e-05, | |
| "loss": 0.5516, | |
| "step": 29200 | |
| }, | |
| { | |
| "ar_loss": 0.391, | |
| "epoch": 4.197633317965268, | |
| "fm_loss": 0.1596, | |
| "grad_norm": 2.0502259731292725, | |
| "learning_rate": 2.893329508624433e-05, | |
| "loss": 0.5506, | |
| "step": 29300 | |
| }, | |
| { | |
| "ar_loss": 0.3897, | |
| "epoch": 4.213001383125865, | |
| "fm_loss": 0.1603, | |
| "grad_norm": 2.44155216217041, | |
| "learning_rate": 2.8813931318816395e-05, | |
| "loss": 0.55, | |
| "step": 29400 | |
| }, | |
| { | |
| "ar_loss": 0.387, | |
| "epoch": 4.228369448286461, | |
| "fm_loss": 0.1616, | |
| "grad_norm": 2.004561185836792, | |
| "learning_rate": 2.869447846905085e-05, | |
| "loss": 0.5487, | |
| "step": 29500 | |
| }, | |
| { | |
| "ar_loss": 0.3875, | |
| "epoch": 4.243737513447057, | |
| "fm_loss": 0.1598, | |
| "grad_norm": 5.270979881286621, | |
| "learning_rate": 2.8574939327018685e-05, | |
| "loss": 0.5473, | |
| "step": 29600 | |
| }, | |
| { | |
| "ar_loss": 0.3873, | |
| "epoch": 4.259105578607653, | |
| "fm_loss": 0.1605, | |
| "grad_norm": 6.422144412994385, | |
| "learning_rate": 2.8455316684806404e-05, | |
| "loss": 0.5478, | |
| "step": 29700 | |
| }, | |
| { | |
| "ar_loss": 0.3856, | |
| "epoch": 4.27447364376825, | |
| "fm_loss": 0.1609, | |
| "grad_norm": 9.331077575683594, | |
| "learning_rate": 2.833561333645085e-05, | |
| "loss": 0.5465, | |
| "step": 29800 | |
| }, | |
| { | |
| "ar_loss": 0.3887, | |
| "epoch": 4.289841708928845, | |
| "fm_loss": 0.1603, | |
| "grad_norm": 3.2531440258026123, | |
| "learning_rate": 2.8215832077873928e-05, | |
| "loss": 0.549, | |
| "step": 29900 | |
| }, | |
| { | |
| "ar_loss": 0.3879, | |
| "epoch": 4.305209774089442, | |
| "fm_loss": 0.1616, | |
| "grad_norm": 4.434958457946777, | |
| "learning_rate": 2.8095975706817283e-05, | |
| "loss": 0.5495, | |
| "step": 30000 | |
| }, | |
| { | |
| "ar_loss": 0.3873, | |
| "epoch": 4.015372790161415, | |
| "fm_loss": 0.16, | |
| "grad_norm": 2.223679542541504, | |
| "learning_rate": 2.7964958224364322e-05, | |
| "loss": 0.5474, | |
| "step": 30100 | |
| }, | |
| { | |
| "ar_loss": 0.3861, | |
| "epoch": 4.0307455803228285, | |
| "fm_loss": 0.1608, | |
| "grad_norm": 2.2263758182525635, | |
| "learning_rate": 2.7844916800976393e-05, | |
| "loss": 0.5468, | |
| "step": 30200 | |
| }, | |
| { | |
| "ar_loss": 0.385, | |
| "epoch": 4.046118370484243, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 3.2188937664031982, | |
| "learning_rate": 2.772480888770234e-05, | |
| "loss": 0.5442, | |
| "step": 30300 | |
| }, | |
| { | |
| "ar_loss": 0.3838, | |
| "epoch": 4.061491160645657, | |
| "fm_loss": 0.1605, | |
| "grad_norm": 2.1031410694122314, | |
| "learning_rate": 2.7604637291640594e-05, | |
| "loss": 0.5443, | |
| "step": 30400 | |
| }, | |
| { | |
| "ar_loss": 0.3874, | |
| "epoch": 4.076863950807072, | |
| "fm_loss": 0.1604, | |
| "grad_norm": 3.646202564239502, | |
| "learning_rate": 2.748440482137793e-05, | |
| "loss": 0.5478, | |
| "step": 30500 | |
| }, | |
| { | |
| "ar_loss": 0.3888, | |
| "epoch": 4.092236740968485, | |
| "fm_loss": 0.16, | |
| "grad_norm": 3.0315663814544678, | |
| "learning_rate": 2.7364114286923865e-05, | |
| "loss": 0.5488, | |
| "step": 30600 | |
| }, | |
| { | |
| "ar_loss": 0.386, | |
| "epoch": 4.1076095311299, | |
| "fm_loss": 0.1597, | |
| "grad_norm": 2.8235671520233154, | |
| "learning_rate": 2.7243768499644946e-05, | |
| "loss": 0.5457, | |
| "step": 30700 | |
| }, | |
| { | |
| "ar_loss": 0.3889, | |
| "epoch": 4.122982321291314, | |
| "fm_loss": 0.161, | |
| "grad_norm": 2.2409422397613525, | |
| "learning_rate": 2.7123370272199055e-05, | |
| "loss": 0.55, | |
| "step": 30800 | |
| }, | |
| { | |
| "ar_loss": 0.3876, | |
| "epoch": 4.138355111452729, | |
| "fm_loss": 0.1612, | |
| "grad_norm": 2.19675874710083, | |
| "learning_rate": 2.700292241846971e-05, | |
| "loss": 0.5488, | |
| "step": 30900 | |
| }, | |
| { | |
| "ar_loss": 0.3871, | |
| "epoch": 4.153727901614143, | |
| "fm_loss": 0.1601, | |
| "grad_norm": 2.340182304382324, | |
| "learning_rate": 2.6882427753500245e-05, | |
| "loss": 0.5471, | |
| "step": 31000 | |
| }, | |
| { | |
| "ar_loss": 0.3879, | |
| "epoch": 4.169100691775557, | |
| "fm_loss": 0.16, | |
| "grad_norm": 3.1240203380584717, | |
| "learning_rate": 2.676188909342801e-05, | |
| "loss": 0.5479, | |
| "step": 31100 | |
| }, | |
| { | |
| "ar_loss": 0.3832, | |
| "epoch": 4.184473481936972, | |
| "fm_loss": 0.1603, | |
| "grad_norm": 2.1297569274902344, | |
| "learning_rate": 2.664130925541865e-05, | |
| "loss": 0.5436, | |
| "step": 31200 | |
| }, | |
| { | |
| "ar_loss": 0.3889, | |
| "epoch": 4.199846272098386, | |
| "fm_loss": 0.1599, | |
| "grad_norm": 3.824352264404297, | |
| "learning_rate": 2.6520691057600155e-05, | |
| "loss": 0.5488, | |
| "step": 31300 | |
| }, | |
| { | |
| "ar_loss": 0.388, | |
| "epoch": 4.2152190622598, | |
| "fm_loss": 0.1605, | |
| "grad_norm": 1.8939049243927002, | |
| "learning_rate": 2.6400037318997046e-05, | |
| "loss": 0.5485, | |
| "step": 31400 | |
| }, | |
| { | |
| "ar_loss": 0.3886, | |
| "epoch": 4.230591852421214, | |
| "fm_loss": 0.1609, | |
| "grad_norm": 2.476659059524536, | |
| "learning_rate": 2.6279350859464502e-05, | |
| "loss": 0.5495, | |
| "step": 31500 | |
| }, | |
| { | |
| "ar_loss": 0.3888, | |
| "epoch": 4.245964642582629, | |
| "fm_loss": 0.1602, | |
| "grad_norm": 3.117105007171631, | |
| "learning_rate": 2.6158634499622425e-05, | |
| "loss": 0.549, | |
| "step": 31600 | |
| }, | |
| { | |
| "ar_loss": 0.386, | |
| "epoch": 4.261337432744043, | |
| "fm_loss": 0.1603, | |
| "grad_norm": 2.931809663772583, | |
| "learning_rate": 2.6037891060789514e-05, | |
| "loss": 0.5464, | |
| "step": 31700 | |
| }, | |
| { | |
| "ar_loss": 0.382, | |
| "epoch": 4.276710222905457, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 2.1063570976257324, | |
| "learning_rate": 2.5917123364917378e-05, | |
| "loss": 0.5411, | |
| "step": 31800 | |
| }, | |
| { | |
| "ar_loss": 0.3863, | |
| "epoch": 4.292083013066872, | |
| "fm_loss": 0.1601, | |
| "grad_norm": 4.796857833862305, | |
| "learning_rate": 2.5796334234524533e-05, | |
| "loss": 0.5463, | |
| "step": 31900 | |
| }, | |
| { | |
| "ar_loss": 0.3849, | |
| "epoch": 4.307455803228286, | |
| "fm_loss": 0.1612, | |
| "grad_norm": 2.840190887451172, | |
| "learning_rate": 2.567552649263044e-05, | |
| "loss": 0.5461, | |
| "step": 32000 | |
| }, | |
| { | |
| "ar_loss": 0.3876, | |
| "epoch": 4.3228285933897, | |
| "fm_loss": 0.1686, | |
| "grad_norm": 2.64380145072937, | |
| "learning_rate": 2.5554702962689563e-05, | |
| "loss": 0.5562, | |
| "step": 32100 | |
| }, | |
| { | |
| "ar_loss": 0.3855, | |
| "epoch": 4.338201383551114, | |
| "fm_loss": 0.1599, | |
| "grad_norm": 4.3930535316467285, | |
| "learning_rate": 2.5433866468525342e-05, | |
| "loss": 0.5453, | |
| "step": 32200 | |
| }, | |
| { | |
| "ar_loss": 0.3825, | |
| "epoch": 4.353574173712529, | |
| "fm_loss": 0.1587, | |
| "grad_norm": 2.1165313720703125, | |
| "learning_rate": 2.531301983426419e-05, | |
| "loss": 0.5412, | |
| "step": 32300 | |
| }, | |
| { | |
| "ar_loss": 0.3849, | |
| "epoch": 4.3689469638739435, | |
| "fm_loss": 0.1601, | |
| "grad_norm": 3.832287073135376, | |
| "learning_rate": 2.519216588426955e-05, | |
| "loss": 0.5449, | |
| "step": 32400 | |
| }, | |
| { | |
| "ar_loss": 0.3872, | |
| "epoch": 4.384319754035357, | |
| "fm_loss": 0.16, | |
| "grad_norm": 2.8910598754882812, | |
| "learning_rate": 2.507130744307581e-05, | |
| "loss": 0.5472, | |
| "step": 32500 | |
| }, | |
| { | |
| "ar_loss": 0.3847, | |
| "epoch": 4.399692544196772, | |
| "fm_loss": 0.1603, | |
| "grad_norm": 2.692793607711792, | |
| "learning_rate": 2.4950447335322335e-05, | |
| "loss": 0.545, | |
| "step": 32600 | |
| }, | |
| { | |
| "ar_loss": 0.3831, | |
| "epoch": 4.415065334358186, | |
| "fm_loss": 0.1595, | |
| "grad_norm": 2.35683536529541, | |
| "learning_rate": 2.482958838568746e-05, | |
| "loss": 0.5426, | |
| "step": 32700 | |
| }, | |
| { | |
| "ar_loss": 0.3861, | |
| "epoch": 4.4304381245196005, | |
| "fm_loss": 0.159, | |
| "grad_norm": 1.9129565954208374, | |
| "learning_rate": 2.4708733418822427e-05, | |
| "loss": 0.5451, | |
| "step": 32800 | |
| }, | |
| { | |
| "ar_loss": 0.3868, | |
| "epoch": 4.445810914681014, | |
| "fm_loss": 0.1599, | |
| "grad_norm": 4.045548915863037, | |
| "learning_rate": 2.4587885259285396e-05, | |
| "loss": 0.5466, | |
| "step": 32900 | |
| }, | |
| { | |
| "ar_loss": 0.3834, | |
| "epoch": 4.461183704842429, | |
| "fm_loss": 0.1601, | |
| "grad_norm": 2.0863168239593506, | |
| "learning_rate": 2.446704673147544e-05, | |
| "loss": 0.5436, | |
| "step": 33000 | |
| }, | |
| { | |
| "ar_loss": 0.3837, | |
| "epoch": 4.476556495003843, | |
| "fm_loss": 0.1596, | |
| "grad_norm": 2.455124855041504, | |
| "learning_rate": 2.4346220659566513e-05, | |
| "loss": 0.5433, | |
| "step": 33100 | |
| }, | |
| { | |
| "ar_loss": 0.3841, | |
| "epoch": 4.4919292851652575, | |
| "fm_loss": 0.1593, | |
| "grad_norm": 2.560342311859131, | |
| "learning_rate": 2.4225409867441483e-05, | |
| "loss": 0.5434, | |
| "step": 33200 | |
| }, | |
| { | |
| "ar_loss": 0.3813, | |
| "epoch": 4.507302075326672, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 13.45487117767334, | |
| "learning_rate": 2.4104617178626075e-05, | |
| "loss": 0.5402, | |
| "step": 33300 | |
| }, | |
| { | |
| "ar_loss": 0.3854, | |
| "epoch": 4.522674865488086, | |
| "fm_loss": 0.1606, | |
| "grad_norm": 1.9280191659927368, | |
| "learning_rate": 2.3983845416222943e-05, | |
| "loss": 0.546, | |
| "step": 33400 | |
| }, | |
| { | |
| "ar_loss": 0.3861, | |
| "epoch": 4.538047655649501, | |
| "fm_loss": 0.1599, | |
| "grad_norm": 4.483201503753662, | |
| "learning_rate": 2.386309740284562e-05, | |
| "loss": 0.5461, | |
| "step": 33500 | |
| }, | |
| { | |
| "ar_loss": 0.3837, | |
| "epoch": 4.553420445810914, | |
| "fm_loss": 0.1597, | |
| "grad_norm": 2.269436836242676, | |
| "learning_rate": 2.3742375960552628e-05, | |
| "loss": 0.5434, | |
| "step": 33600 | |
| }, | |
| { | |
| "ar_loss": 0.3845, | |
| "epoch": 4.568793235972329, | |
| "fm_loss": 0.1595, | |
| "grad_norm": 2.5172126293182373, | |
| "learning_rate": 2.3621683910781458e-05, | |
| "loss": 0.544, | |
| "step": 33700 | |
| }, | |
| { | |
| "ar_loss": 0.3859, | |
| "epoch": 4.584166026133743, | |
| "fm_loss": 0.1586, | |
| "grad_norm": 29.834903717041016, | |
| "learning_rate": 2.3501024074282665e-05, | |
| "loss": 0.5445, | |
| "step": 33800 | |
| }, | |
| { | |
| "ar_loss": 0.3864, | |
| "epoch": 4.599538816295158, | |
| "fm_loss": 0.16, | |
| "grad_norm": 2.567058563232422, | |
| "learning_rate": 2.3380399271053953e-05, | |
| "loss": 0.5464, | |
| "step": 33900 | |
| }, | |
| { | |
| "ar_loss": 0.3836, | |
| "epoch": 4.614911606456571, | |
| "fm_loss": 0.1598, | |
| "grad_norm": 2.2282016277313232, | |
| "learning_rate": 2.3259812320274206e-05, | |
| "loss": 0.5434, | |
| "step": 34000 | |
| }, | |
| { | |
| "ar_loss": 0.3875, | |
| "epoch": 4.630284396617986, | |
| "fm_loss": 0.1595, | |
| "grad_norm": 2.367128849029541, | |
| "learning_rate": 2.313926604023767e-05, | |
| "loss": 0.5469, | |
| "step": 34100 | |
| }, | |
| { | |
| "ar_loss": 0.3826, | |
| "epoch": 4.645657186779401, | |
| "fm_loss": 0.1587, | |
| "grad_norm": 2.4427452087402344, | |
| "learning_rate": 2.3018763248288043e-05, | |
| "loss": 0.5413, | |
| "step": 34200 | |
| }, | |
| { | |
| "ar_loss": 0.3856, | |
| "epoch": 4.661029976940815, | |
| "fm_loss": 0.1597, | |
| "grad_norm": 3.8236427307128906, | |
| "learning_rate": 2.289830676075265e-05, | |
| "loss": 0.5453, | |
| "step": 34300 | |
| }, | |
| { | |
| "ar_loss": 0.3857, | |
| "epoch": 4.676402767102229, | |
| "fm_loss": 0.16, | |
| "grad_norm": 3.789658546447754, | |
| "learning_rate": 2.2777899392876596e-05, | |
| "loss": 0.5457, | |
| "step": 34400 | |
| }, | |
| { | |
| "ar_loss": 0.3843, | |
| "epoch": 4.691775557263643, | |
| "fm_loss": 0.1598, | |
| "grad_norm": 7.402520656585693, | |
| "learning_rate": 2.265754395875703e-05, | |
| "loss": 0.5441, | |
| "step": 34500 | |
| }, | |
| { | |
| "ar_loss": 0.3828, | |
| "epoch": 4.707148347425058, | |
| "fm_loss": 0.1606, | |
| "grad_norm": 2.951808452606201, | |
| "learning_rate": 2.2537243271277286e-05, | |
| "loss": 0.5434, | |
| "step": 34600 | |
| }, | |
| { | |
| "ar_loss": 0.3828, | |
| "epoch": 4.722521137586472, | |
| "fm_loss": 0.1602, | |
| "grad_norm": 2.665632724761963, | |
| "learning_rate": 2.241700014204121e-05, | |
| "loss": 0.543, | |
| "step": 34700 | |
| }, | |
| { | |
| "ar_loss": 0.3818, | |
| "epoch": 4.737893927747886, | |
| "fm_loss": 0.1606, | |
| "grad_norm": 2.390002727508545, | |
| "learning_rate": 2.2296817381307425e-05, | |
| "loss": 0.5423, | |
| "step": 34800 | |
| }, | |
| { | |
| "ar_loss": 0.3864, | |
| "epoch": 4.753266717909301, | |
| "fm_loss": 0.1599, | |
| "grad_norm": 5.915409564971924, | |
| "learning_rate": 2.2176697797923653e-05, | |
| "loss": 0.5463, | |
| "step": 34900 | |
| }, | |
| { | |
| "ar_loss": 0.3823, | |
| "epoch": 4.768639508070715, | |
| "fm_loss": 0.165, | |
| "grad_norm": 3.208481550216675, | |
| "learning_rate": 2.205664419926106e-05, | |
| "loss": 0.5473, | |
| "step": 35000 | |
| }, | |
| { | |
| "ar_loss": 0.384, | |
| "epoch": 4.784012298232129, | |
| "fm_loss": 0.1604, | |
| "grad_norm": 2.6016440391540527, | |
| "learning_rate": 2.1936659391148682e-05, | |
| "loss": 0.5444, | |
| "step": 35100 | |
| }, | |
| { | |
| "ar_loss": 0.3839, | |
| "epoch": 4.799385088393543, | |
| "fm_loss": 0.16, | |
| "grad_norm": 7.024256229400635, | |
| "learning_rate": 2.1816746177807777e-05, | |
| "loss": 0.5438, | |
| "step": 35200 | |
| }, | |
| { | |
| "ar_loss": 0.3835, | |
| "epoch": 4.814757878554958, | |
| "fm_loss": 0.1604, | |
| "grad_norm": 2.983032464981079, | |
| "learning_rate": 2.169690736178636e-05, | |
| "loss": 0.5439, | |
| "step": 35300 | |
| }, | |
| { | |
| "ar_loss": 0.3857, | |
| "epoch": 4.830130668716372, | |
| "fm_loss": 0.1596, | |
| "grad_norm": 3.4045064449310303, | |
| "learning_rate": 2.1577145743893652e-05, | |
| "loss": 0.5453, | |
| "step": 35400 | |
| }, | |
| { | |
| "ar_loss": 0.3799, | |
| "epoch": 4.845503458877786, | |
| "fm_loss": 0.1606, | |
| "grad_norm": 3.3932929039001465, | |
| "learning_rate": 2.1457464123134654e-05, | |
| "loss": 0.5406, | |
| "step": 35500 | |
| }, | |
| { | |
| "ar_loss": 0.3815, | |
| "epoch": 4.860876249039201, | |
| "fm_loss": 0.1618, | |
| "grad_norm": 5.131358623504639, | |
| "learning_rate": 2.1337865296644693e-05, | |
| "loss": 0.5433, | |
| "step": 35600 | |
| }, | |
| { | |
| "ar_loss": 0.3839, | |
| "epoch": 4.876249039200615, | |
| "fm_loss": 0.1597, | |
| "grad_norm": 2.781935453414917, | |
| "learning_rate": 2.1218352059624125e-05, | |
| "loss": 0.5436, | |
| "step": 35700 | |
| }, | |
| { | |
| "ar_loss": 0.3836, | |
| "epoch": 4.8916218293620295, | |
| "fm_loss": 0.1593, | |
| "grad_norm": 4.221926689147949, | |
| "learning_rate": 2.1098927205272888e-05, | |
| "loss": 0.5429, | |
| "step": 35800 | |
| }, | |
| { | |
| "ar_loss": 0.3839, | |
| "epoch": 4.906994619523443, | |
| "fm_loss": 0.1595, | |
| "grad_norm": 3.9555509090423584, | |
| "learning_rate": 2.0979593524725326e-05, | |
| "loss": 0.5434, | |
| "step": 35900 | |
| }, | |
| { | |
| "ar_loss": 0.3815, | |
| "epoch": 4.922367409684858, | |
| "fm_loss": 0.1597, | |
| "grad_norm": 2.448594331741333, | |
| "learning_rate": 2.0860353806984917e-05, | |
| "loss": 0.5412, | |
| "step": 36000 | |
| }, | |
| { | |
| "ar_loss": 0.3813, | |
| "epoch": 4.937740199846272, | |
| "fm_loss": 0.1604, | |
| "grad_norm": 2.342075824737549, | |
| "learning_rate": 2.074121083885907e-05, | |
| "loss": 0.5417, | |
| "step": 36100 | |
| }, | |
| { | |
| "ar_loss": 0.3825, | |
| "epoch": 4.9531129900076865, | |
| "fm_loss": 0.1601, | |
| "grad_norm": 7.287113666534424, | |
| "learning_rate": 2.0622167404894034e-05, | |
| "loss": 0.5426, | |
| "step": 36200 | |
| }, | |
| { | |
| "ar_loss": 0.3835, | |
| "epoch": 4.9684857801691, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 15.53018569946289, | |
| "learning_rate": 2.0503226287309786e-05, | |
| "loss": 0.5418, | |
| "step": 36300 | |
| }, | |
| { | |
| "ar_loss": 0.3832, | |
| "epoch": 4.983858570330515, | |
| "fm_loss": 0.1783, | |
| "grad_norm": 36.44465637207031, | |
| "learning_rate": 2.0384390265935027e-05, | |
| "loss": 0.5614, | |
| "step": 36400 | |
| }, | |
| { | |
| "ar_loss": 0.382, | |
| "epoch": 4.99923136049193, | |
| "fm_loss": 0.1595, | |
| "grad_norm": 4.1146745681762695, | |
| "learning_rate": 2.02656621181422e-05, | |
| "loss": 0.5415, | |
| "step": 36500 | |
| }, | |
| { | |
| "ar_loss": 0.3779, | |
| "epoch": 5.014604150653343, | |
| "fm_loss": 0.1594, | |
| "grad_norm": 2.2293853759765625, | |
| "learning_rate": 2.0147044618782585e-05, | |
| "loss": 0.5373, | |
| "step": 36600 | |
| }, | |
| { | |
| "ar_loss": 0.3786, | |
| "epoch": 5.029976940814758, | |
| "fm_loss": 0.1599, | |
| "grad_norm": 3.3400254249572754, | |
| "learning_rate": 2.0028540540121444e-05, | |
| "loss": 0.5386, | |
| "step": 36700 | |
| }, | |
| { | |
| "ar_loss": 0.3761, | |
| "epoch": 5.045349730976172, | |
| "fm_loss": 0.1594, | |
| "grad_norm": 2.30407452583313, | |
| "learning_rate": 1.9910152651773235e-05, | |
| "loss": 0.5355, | |
| "step": 36800 | |
| }, | |
| { | |
| "ar_loss": 0.3785, | |
| "epoch": 5.060722521137587, | |
| "fm_loss": 0.1592, | |
| "grad_norm": 2.3268890380859375, | |
| "learning_rate": 1.9791883720636864e-05, | |
| "loss": 0.5376, | |
| "step": 36900 | |
| }, | |
| { | |
| "ar_loss": 0.3771, | |
| "epoch": 5.076095311299, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 4.636923789978027, | |
| "learning_rate": 1.967373651083106e-05, | |
| "loss": 0.5354, | |
| "step": 37000 | |
| }, | |
| { | |
| "ar_loss": 0.3741, | |
| "epoch": 5.091468101460415, | |
| "fm_loss": 0.1595, | |
| "grad_norm": 8.473295211791992, | |
| "learning_rate": 1.9555713783629714e-05, | |
| "loss": 0.5336, | |
| "step": 37100 | |
| }, | |
| { | |
| "ar_loss": 0.3763, | |
| "epoch": 5.10684089162183, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 2.227837562561035, | |
| "learning_rate": 1.9437818297397403e-05, | |
| "loss": 0.5351, | |
| "step": 37200 | |
| }, | |
| { | |
| "ar_loss": 0.3778, | |
| "epoch": 5.1222136817832435, | |
| "fm_loss": 0.1597, | |
| "grad_norm": 2.5238685607910156, | |
| "learning_rate": 1.9320052807524874e-05, | |
| "loss": 0.5376, | |
| "step": 37300 | |
| }, | |
| { | |
| "ar_loss": 0.3799, | |
| "epoch": 5.137586471944658, | |
| "fm_loss": 0.16, | |
| "grad_norm": 3.3944637775421143, | |
| "learning_rate": 1.9202420066364678e-05, | |
| "loss": 0.5399, | |
| "step": 37400 | |
| }, | |
| { | |
| "ar_loss": 0.3767, | |
| "epoch": 5.152959262106072, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 16.466928482055664, | |
| "learning_rate": 1.908492282316683e-05, | |
| "loss": 0.5351, | |
| "step": 37500 | |
| }, | |
| { | |
| "ar_loss": 0.3743, | |
| "epoch": 5.168332052267487, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 2.3075056076049805, | |
| "learning_rate": 1.8967563824014563e-05, | |
| "loss": 0.5334, | |
| "step": 37600 | |
| }, | |
| { | |
| "ar_loss": 0.3821, | |
| "epoch": 5.1837048424289005, | |
| "fm_loss": 0.1595, | |
| "grad_norm": 2.2367005348205566, | |
| "learning_rate": 1.8850345811760152e-05, | |
| "loss": 0.5415, | |
| "step": 37700 | |
| }, | |
| { | |
| "ar_loss": 0.3746, | |
| "epoch": 5.199077632590315, | |
| "fm_loss": 0.1596, | |
| "grad_norm": 1.8881235122680664, | |
| "learning_rate": 1.873327152596077e-05, | |
| "loss": 0.5342, | |
| "step": 37800 | |
| }, | |
| { | |
| "ar_loss": 0.3776, | |
| "epoch": 5.21445042275173, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 2.585137128829956, | |
| "learning_rate": 1.861634370281453e-05, | |
| "loss": 0.5365, | |
| "step": 37900 | |
| }, | |
| { | |
| "ar_loss": 0.3767, | |
| "epoch": 5.229823212913144, | |
| "fm_loss": 0.1594, | |
| "grad_norm": 3.3256664276123047, | |
| "learning_rate": 1.849956507509647e-05, | |
| "loss": 0.5361, | |
| "step": 38000 | |
| }, | |
| { | |
| "ar_loss": 0.3767, | |
| "epoch": 5.245196003074558, | |
| "fm_loss": 0.1601, | |
| "grad_norm": 2.189387559890747, | |
| "learning_rate": 1.838293837209472e-05, | |
| "loss": 0.5368, | |
| "step": 38100 | |
| }, | |
| { | |
| "ar_loss": 0.3765, | |
| "epoch": 5.260568793235972, | |
| "fm_loss": 0.1638, | |
| "grad_norm": 2.6017394065856934, | |
| "learning_rate": 1.8266466319546712e-05, | |
| "loss": 0.5403, | |
| "step": 38200 | |
| }, | |
| { | |
| "ar_loss": 0.3755, | |
| "epoch": 5.275941583397387, | |
| "fm_loss": 0.1598, | |
| "grad_norm": 3.207014322280884, | |
| "learning_rate": 1.8150151639575466e-05, | |
| "loss": 0.5354, | |
| "step": 38300 | |
| }, | |
| { | |
| "ar_loss": 0.3772, | |
| "epoch": 5.291314373558801, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 3.0959692001342773, | |
| "learning_rate": 1.8033997050625966e-05, | |
| "loss": 0.536, | |
| "step": 38400 | |
| }, | |
| { | |
| "ar_loss": 0.3742, | |
| "epoch": 5.306687163720215, | |
| "fm_loss": 0.1587, | |
| "grad_norm": 2.948103666305542, | |
| "learning_rate": 1.791800526740165e-05, | |
| "loss": 0.5329, | |
| "step": 38500 | |
| }, | |
| { | |
| "ar_loss": 0.3768, | |
| "epoch": 5.322059953881629, | |
| "fm_loss": 0.1633, | |
| "grad_norm": 3.3506641387939453, | |
| "learning_rate": 1.7802179000800927e-05, | |
| "loss": 0.5401, | |
| "step": 38600 | |
| }, | |
| { | |
| "ar_loss": 0.3748, | |
| "epoch": 5.337432744043044, | |
| "fm_loss": 0.1593, | |
| "grad_norm": 1.7779144048690796, | |
| "learning_rate": 1.768652095785385e-05, | |
| "loss": 0.5342, | |
| "step": 38700 | |
| }, | |
| { | |
| "ar_loss": 0.3771, | |
| "epoch": 5.3528055342044585, | |
| "fm_loss": 0.1593, | |
| "grad_norm": 3.081190347671509, | |
| "learning_rate": 1.7571033841658844e-05, | |
| "loss": 0.5365, | |
| "step": 38800 | |
| }, | |
| { | |
| "ar_loss": 0.3769, | |
| "epoch": 5.368178324365872, | |
| "fm_loss": 0.159, | |
| "grad_norm": 2.642840623855591, | |
| "learning_rate": 1.7455720351319516e-05, | |
| "loss": 0.5359, | |
| "step": 38900 | |
| }, | |
| { | |
| "ar_loss": 0.3759, | |
| "epoch": 5.383551114527287, | |
| "fm_loss": 0.1596, | |
| "grad_norm": 3.235140323638916, | |
| "learning_rate": 1.734058318188158e-05, | |
| "loss": 0.5355, | |
| "step": 39000 | |
| }, | |
| { | |
| "ar_loss": 0.3728, | |
| "epoch": 5.398923904688701, | |
| "fm_loss": 0.1596, | |
| "grad_norm": 2.3143417835235596, | |
| "learning_rate": 1.7225625024269877e-05, | |
| "loss": 0.5324, | |
| "step": 39100 | |
| }, | |
| { | |
| "ar_loss": 0.3762, | |
| "epoch": 5.414296694850115, | |
| "fm_loss": 0.1592, | |
| "grad_norm": 2.0119452476501465, | |
| "learning_rate": 1.711084856522548e-05, | |
| "loss": 0.5354, | |
| "step": 39200 | |
| }, | |
| { | |
| "ar_loss": 0.3725, | |
| "epoch": 5.429669485011529, | |
| "fm_loss": 0.1596, | |
| "grad_norm": 2.9905428886413574, | |
| "learning_rate": 1.6996256487242894e-05, | |
| "loss": 0.5321, | |
| "step": 39300 | |
| }, | |
| { | |
| "ar_loss": 0.3746, | |
| "epoch": 5.445042275172944, | |
| "fm_loss": 0.1635, | |
| "grad_norm": 2.617199659347534, | |
| "learning_rate": 1.6881851468507358e-05, | |
| "loss": 0.5382, | |
| "step": 39400 | |
| }, | |
| { | |
| "ar_loss": 0.3746, | |
| "epoch": 5.460415065334358, | |
| "fm_loss": 0.1599, | |
| "grad_norm": 2.1979682445526123, | |
| "learning_rate": 1.6767636182832292e-05, | |
| "loss": 0.5345, | |
| "step": 39500 | |
| }, | |
| { | |
| "ar_loss": 0.3768, | |
| "epoch": 5.475787855495772, | |
| "fm_loss": 0.1586, | |
| "grad_norm": 3.3658335208892822, | |
| "learning_rate": 1.6653613299596748e-05, | |
| "loss": 0.5354, | |
| "step": 39600 | |
| }, | |
| { | |
| "ar_loss": 0.3753, | |
| "epoch": 5.491160645657187, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 3.347707748413086, | |
| "learning_rate": 1.6539785483683063e-05, | |
| "loss": 0.5336, | |
| "step": 39700 | |
| }, | |
| { | |
| "ar_loss": 0.3786, | |
| "epoch": 5.506533435818601, | |
| "fm_loss": 0.1625, | |
| "grad_norm": 2.0319979190826416, | |
| "learning_rate": 1.6426155395414555e-05, | |
| "loss": 0.5411, | |
| "step": 39800 | |
| }, | |
| { | |
| "ar_loss": 0.3765, | |
| "epoch": 5.521906225980016, | |
| "fm_loss": 0.16, | |
| "grad_norm": 2.5716593265533447, | |
| "learning_rate": 1.631272569049336e-05, | |
| "loss": 0.5365, | |
| "step": 39900 | |
| }, | |
| { | |
| "ar_loss": 0.3767, | |
| "epoch": 5.537279016141429, | |
| "fm_loss": 0.1596, | |
| "grad_norm": 2.94095516204834, | |
| "learning_rate": 1.6199499019938363e-05, | |
| "loss": 0.5364, | |
| "step": 40000 | |
| }, | |
| { | |
| "ar_loss": 0.3755, | |
| "epoch": 6.015372790161415, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 2.66264271736145, | |
| "learning_rate": 1.6086478030023248e-05, | |
| "loss": 0.5343, | |
| "step": 40100 | |
| }, | |
| { | |
| "ar_loss": 0.3771, | |
| "epoch": 6.0307455803228285, | |
| "fm_loss": 0.1577, | |
| "grad_norm": 3.340590476989746, | |
| "learning_rate": 1.597366536221462e-05, | |
| "loss": 0.5349, | |
| "step": 40200 | |
| }, | |
| { | |
| "ar_loss": 0.3748, | |
| "epoch": 6.046118370484243, | |
| "fm_loss": 0.1597, | |
| "grad_norm": 2.351405620574951, | |
| "learning_rate": 1.5861063653110292e-05, | |
| "loss": 0.5345, | |
| "step": 40300 | |
| }, | |
| { | |
| "ar_loss": 0.3747, | |
| "epoch": 6.061491160645657, | |
| "fm_loss": 0.1608, | |
| "grad_norm": 3.0236189365386963, | |
| "learning_rate": 1.5748675534377683e-05, | |
| "loss": 0.5355, | |
| "step": 40400 | |
| }, | |
| { | |
| "ar_loss": 0.3763, | |
| "epoch": 6.076863950807072, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 2.360391139984131, | |
| "learning_rate": 1.563650363269227e-05, | |
| "loss": 0.5351, | |
| "step": 40500 | |
| }, | |
| { | |
| "ar_loss": 0.377, | |
| "epoch": 6.092236740968485, | |
| "fm_loss": 0.1611, | |
| "grad_norm": 2.1837239265441895, | |
| "learning_rate": 1.5524550569676224e-05, | |
| "loss": 0.5381, | |
| "step": 40600 | |
| }, | |
| { | |
| "ar_loss": 0.376, | |
| "epoch": 6.1076095311299, | |
| "fm_loss": 0.1585, | |
| "grad_norm": 3.4195451736450195, | |
| "learning_rate": 1.541281896183715e-05, | |
| "loss": 0.5346, | |
| "step": 40700 | |
| }, | |
| { | |
| "ar_loss": 0.3785, | |
| "epoch": 6.122982321291314, | |
| "fm_loss": 0.1592, | |
| "grad_norm": 3.0176382064819336, | |
| "learning_rate": 1.5301311420506897e-05, | |
| "loss": 0.5377, | |
| "step": 40800 | |
| }, | |
| { | |
| "ar_loss": 0.3735, | |
| "epoch": 6.138355111452729, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 3.1062843799591064, | |
| "learning_rate": 1.5190030551780564e-05, | |
| "loss": 0.5323, | |
| "step": 40900 | |
| }, | |
| { | |
| "ar_loss": 0.374, | |
| "epoch": 6.153727901614143, | |
| "fm_loss": 0.1592, | |
| "grad_norm": 2.2580771446228027, | |
| "learning_rate": 1.5078978956455581e-05, | |
| "loss": 0.5332, | |
| "step": 41000 | |
| }, | |
| { | |
| "ar_loss": 0.3748, | |
| "epoch": 6.169100691775557, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 3.3398499488830566, | |
| "learning_rate": 1.4968159229970914e-05, | |
| "loss": 0.5333, | |
| "step": 41100 | |
| }, | |
| { | |
| "ar_loss": 0.377, | |
| "epoch": 6.184473481936972, | |
| "fm_loss": 0.1581, | |
| "grad_norm": 4.929275989532471, | |
| "learning_rate": 1.4857573962346411e-05, | |
| "loss": 0.5351, | |
| "step": 41200 | |
| }, | |
| { | |
| "ar_loss": 0.3762, | |
| "epoch": 6.199846272098386, | |
| "fm_loss": 0.16, | |
| "grad_norm": 1.9189740419387817, | |
| "learning_rate": 1.4747225738122278e-05, | |
| "loss": 0.5362, | |
| "step": 41300 | |
| }, | |
| { | |
| "ar_loss": 0.3733, | |
| "epoch": 6.2152190622598, | |
| "fm_loss": 0.1599, | |
| "grad_norm": 2.381868600845337, | |
| "learning_rate": 1.4637117136298673e-05, | |
| "loss": 0.5333, | |
| "step": 41400 | |
| }, | |
| { | |
| "ar_loss": 0.3741, | |
| "epoch": 6.230591852421214, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 4.60919713973999, | |
| "learning_rate": 1.452725073027541e-05, | |
| "loss": 0.5332, | |
| "step": 41500 | |
| }, | |
| { | |
| "ar_loss": 0.3768, | |
| "epoch": 6.245964642582629, | |
| "fm_loss": 0.1579, | |
| "grad_norm": 8.782063484191895, | |
| "learning_rate": 1.4417629087791868e-05, | |
| "loss": 0.5347, | |
| "step": 41600 | |
| }, | |
| { | |
| "ar_loss": 0.3755, | |
| "epoch": 6.261337432744043, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 3.957836627960205, | |
| "learning_rate": 1.4308254770866886e-05, | |
| "loss": 0.5344, | |
| "step": 41700 | |
| }, | |
| { | |
| "ar_loss": 0.3779, | |
| "epoch": 6.276710222905457, | |
| "fm_loss": 0.1578, | |
| "grad_norm": 4.637214183807373, | |
| "learning_rate": 1.4199130335738981e-05, | |
| "loss": 0.5357, | |
| "step": 41800 | |
| }, | |
| { | |
| "ar_loss": 0.3745, | |
| "epoch": 6.292083013066872, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 4.1944804191589355, | |
| "learning_rate": 1.409025833280655e-05, | |
| "loss": 0.5334, | |
| "step": 41900 | |
| }, | |
| { | |
| "ar_loss": 0.3738, | |
| "epoch": 6.307455803228286, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 3.7318429946899414, | |
| "learning_rate": 1.3981641306568299e-05, | |
| "loss": 0.5328, | |
| "step": 42000 | |
| }, | |
| { | |
| "ar_loss": 0.3754, | |
| "epoch": 6.3228285933897, | |
| "fm_loss": 0.16, | |
| "grad_norm": 1.8556092977523804, | |
| "learning_rate": 1.3873281795563737e-05, | |
| "loss": 0.5355, | |
| "step": 42100 | |
| }, | |
| { | |
| "ar_loss": 0.3718, | |
| "epoch": 6.338201383551114, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 3.1865854263305664, | |
| "learning_rate": 1.3765182332313859e-05, | |
| "loss": 0.5302, | |
| "step": 42200 | |
| }, | |
| { | |
| "ar_loss": 0.3778, | |
| "epoch": 6.353574173712529, | |
| "fm_loss": 0.1593, | |
| "grad_norm": 2.2085890769958496, | |
| "learning_rate": 1.3657345443261967e-05, | |
| "loss": 0.5371, | |
| "step": 42300 | |
| }, | |
| { | |
| "ar_loss": 0.377, | |
| "epoch": 6.3689469638739435, | |
| "fm_loss": 0.1592, | |
| "grad_norm": 4.625204563140869, | |
| "learning_rate": 1.3549773648714631e-05, | |
| "loss": 0.5363, | |
| "step": 42400 | |
| }, | |
| { | |
| "ar_loss": 0.3775, | |
| "epoch": 6.384319754035357, | |
| "fm_loss": 0.1581, | |
| "grad_norm": 2.3030028343200684, | |
| "learning_rate": 1.3442469462782741e-05, | |
| "loss": 0.5356, | |
| "step": 42500 | |
| }, | |
| { | |
| "ar_loss": 0.3768, | |
| "epoch": 6.399692544196772, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 6.936190128326416, | |
| "learning_rate": 1.3335435393322826e-05, | |
| "loss": 0.5359, | |
| "step": 42600 | |
| }, | |
| { | |
| "ar_loss": 0.374, | |
| "epoch": 6.415065334358186, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 3.345165252685547, | |
| "learning_rate": 1.322867394187836e-05, | |
| "loss": 0.5331, | |
| "step": 42700 | |
| }, | |
| { | |
| "ar_loss": 0.377, | |
| "epoch": 6.4304381245196005, | |
| "fm_loss": 0.1595, | |
| "grad_norm": 3.3900644779205322, | |
| "learning_rate": 1.3122187603621356e-05, | |
| "loss": 0.5366, | |
| "step": 42800 | |
| }, | |
| { | |
| "ar_loss": 0.3728, | |
| "epoch": 6.445810914681014, | |
| "fm_loss": 0.1582, | |
| "grad_norm": 2.5638771057128906, | |
| "learning_rate": 1.3015978867293996e-05, | |
| "loss": 0.531, | |
| "step": 42900 | |
| }, | |
| { | |
| "ar_loss": 0.377, | |
| "epoch": 6.461183704842429, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 2.6694748401641846, | |
| "learning_rate": 1.2910050215150526e-05, | |
| "loss": 0.5358, | |
| "step": 43000 | |
| }, | |
| { | |
| "ar_loss": 0.3739, | |
| "epoch": 6.476556495003843, | |
| "fm_loss": 0.1593, | |
| "grad_norm": 4.689158916473389, | |
| "learning_rate": 1.2804404122899197e-05, | |
| "loss": 0.5332, | |
| "step": 43100 | |
| }, | |
| { | |
| "ar_loss": 0.3763, | |
| "epoch": 6.4919292851652575, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 2.5251529216766357, | |
| "learning_rate": 1.2699043059644444e-05, | |
| "loss": 0.5352, | |
| "step": 43200 | |
| }, | |
| { | |
| "ar_loss": 0.3754, | |
| "epoch": 6.507302075326672, | |
| "fm_loss": 0.1582, | |
| "grad_norm": 2.896928548812866, | |
| "learning_rate": 1.2593969487829133e-05, | |
| "loss": 0.5337, | |
| "step": 43300 | |
| }, | |
| { | |
| "ar_loss": 0.3782, | |
| "epoch": 6.522674865488086, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 8.51708698272705, | |
| "learning_rate": 1.2489185863177032e-05, | |
| "loss": 0.5371, | |
| "step": 43400 | |
| }, | |
| { | |
| "ar_loss": 0.3763, | |
| "epoch": 6.538047655649501, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 2.936885356903076, | |
| "learning_rate": 1.2384694634635433e-05, | |
| "loss": 0.5347, | |
| "step": 43500 | |
| }, | |
| { | |
| "ar_loss": 0.3744, | |
| "epoch": 6.553420445810914, | |
| "fm_loss": 0.1592, | |
| "grad_norm": 2.76309871673584, | |
| "learning_rate": 1.2280498244317883e-05, | |
| "loss": 0.5337, | |
| "step": 43600 | |
| }, | |
| { | |
| "ar_loss": 0.3742, | |
| "epoch": 6.568793235972329, | |
| "fm_loss": 0.1586, | |
| "grad_norm": 2.100710868835449, | |
| "learning_rate": 1.2176599127447147e-05, | |
| "loss": 0.5328, | |
| "step": 43700 | |
| }, | |
| { | |
| "ar_loss": 0.3777, | |
| "epoch": 6.584166026133743, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 2.9072883129119873, | |
| "learning_rate": 1.2072999712298242e-05, | |
| "loss": 0.5365, | |
| "step": 43800 | |
| }, | |
| { | |
| "ar_loss": 0.3757, | |
| "epoch": 6.599538816295158, | |
| "fm_loss": 0.1627, | |
| "grad_norm": 2.924342393875122, | |
| "learning_rate": 1.196970242014176e-05, | |
| "loss": 0.5384, | |
| "step": 43900 | |
| }, | |
| { | |
| "ar_loss": 0.3763, | |
| "epoch": 6.614911606456571, | |
| "fm_loss": 0.1586, | |
| "grad_norm": 2.559717893600464, | |
| "learning_rate": 1.1866709665187205e-05, | |
| "loss": 0.5349, | |
| "step": 44000 | |
| }, | |
| { | |
| "ar_loss": 0.3732, | |
| "epoch": 6.630284396617986, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 6.084836006164551, | |
| "learning_rate": 1.1764023854526593e-05, | |
| "loss": 0.532, | |
| "step": 44100 | |
| }, | |
| { | |
| "ar_loss": 0.3753, | |
| "epoch": 6.645657186779401, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 2.4638426303863525, | |
| "learning_rate": 1.1661647388078211e-05, | |
| "loss": 0.5341, | |
| "step": 44200 | |
| }, | |
| { | |
| "ar_loss": 0.3742, | |
| "epoch": 6.661029976940815, | |
| "fm_loss": 0.1594, | |
| "grad_norm": 2.2063241004943848, | |
| "learning_rate": 1.1559582658530526e-05, | |
| "loss": 0.5335, | |
| "step": 44300 | |
| }, | |
| { | |
| "ar_loss": 0.3743, | |
| "epoch": 6.676402767102229, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 2.847790479660034, | |
| "learning_rate": 1.1457832051286235e-05, | |
| "loss": 0.5333, | |
| "step": 44400 | |
| }, | |
| { | |
| "ar_loss": 0.3724, | |
| "epoch": 6.691775557263643, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 5.002224445343018, | |
| "learning_rate": 1.1356397944406566e-05, | |
| "loss": 0.5315, | |
| "step": 44500 | |
| }, | |
| { | |
| "ar_loss": 0.3739, | |
| "epoch": 6.707148347425058, | |
| "fm_loss": 0.1592, | |
| "grad_norm": 2.6647820472717285, | |
| "learning_rate": 1.125528270855564e-05, | |
| "loss": 0.5331, | |
| "step": 44600 | |
| }, | |
| { | |
| "ar_loss": 0.3758, | |
| "epoch": 6.722521137586472, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 2.1632769107818604, | |
| "learning_rate": 1.1154488706945104e-05, | |
| "loss": 0.5341, | |
| "step": 44700 | |
| }, | |
| { | |
| "ar_loss": 0.3761, | |
| "epoch": 6.737893927747886, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 2.196767568588257, | |
| "learning_rate": 1.105401829527889e-05, | |
| "loss": 0.535, | |
| "step": 44800 | |
| }, | |
| { | |
| "ar_loss": 0.3755, | |
| "epoch": 6.753266717909301, | |
| "fm_loss": 0.1585, | |
| "grad_norm": 3.3876090049743652, | |
| "learning_rate": 1.0953873821698153e-05, | |
| "loss": 0.534, | |
| "step": 44900 | |
| }, | |
| { | |
| "ar_loss": 0.375, | |
| "epoch": 6.768639508070715, | |
| "fm_loss": 0.1587, | |
| "grad_norm": 2.2706313133239746, | |
| "learning_rate": 1.085405762672639e-05, | |
| "loss": 0.5337, | |
| "step": 45000 | |
| }, | |
| { | |
| "ar_loss": 0.3739, | |
| "epoch": 6.784012298232129, | |
| "fm_loss": 0.159, | |
| "grad_norm": 2.5273022651672363, | |
| "learning_rate": 1.0754572043214773e-05, | |
| "loss": 0.5329, | |
| "step": 45100 | |
| }, | |
| { | |
| "ar_loss": 0.3734, | |
| "epoch": 6.799385088393543, | |
| "fm_loss": 0.1596, | |
| "grad_norm": 2.579967498779297, | |
| "learning_rate": 1.0655419396287578e-05, | |
| "loss": 0.533, | |
| "step": 45200 | |
| }, | |
| { | |
| "ar_loss": 0.3717, | |
| "epoch": 6.814757878554958, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 36.556278228759766, | |
| "learning_rate": 1.0556602003287847e-05, | |
| "loss": 0.53, | |
| "step": 45300 | |
| }, | |
| { | |
| "ar_loss": 0.3769, | |
| "epoch": 6.830130668716372, | |
| "fm_loss": 0.1586, | |
| "grad_norm": 5.545002460479736, | |
| "learning_rate": 1.045812217372327e-05, | |
| "loss": 0.5355, | |
| "step": 45400 | |
| }, | |
| { | |
| "ar_loss": 0.3737, | |
| "epoch": 6.845503458877786, | |
| "fm_loss": 0.1587, | |
| "grad_norm": 2.081123113632202, | |
| "learning_rate": 1.0359982209212178e-05, | |
| "loss": 0.5323, | |
| "step": 45500 | |
| }, | |
| { | |
| "ar_loss": 0.3722, | |
| "epoch": 6.860876249039201, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 3.176176071166992, | |
| "learning_rate": 1.0262184403429739e-05, | |
| "loss": 0.5313, | |
| "step": 45600 | |
| }, | |
| { | |
| "ar_loss": 0.3785, | |
| "epoch": 6.876249039200615, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 7.945939540863037, | |
| "learning_rate": 1.016473104205441e-05, | |
| "loss": 0.5369, | |
| "step": 45700 | |
| }, | |
| { | |
| "ar_loss": 0.3752, | |
| "epoch": 6.8916218293620295, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 3.0679447650909424, | |
| "learning_rate": 1.0067624402714438e-05, | |
| "loss": 0.5341, | |
| "step": 45800 | |
| }, | |
| { | |
| "ar_loss": 0.3731, | |
| "epoch": 6.906994619523443, | |
| "fm_loss": 0.159, | |
| "grad_norm": 4.075259685516357, | |
| "learning_rate": 9.970866754934677e-06, | |
| "loss": 0.532, | |
| "step": 45900 | |
| }, | |
| { | |
| "ar_loss": 0.3768, | |
| "epoch": 6.922367409684858, | |
| "fm_loss": 0.1581, | |
| "grad_norm": 2.1700098514556885, | |
| "learning_rate": 9.874460360083537e-06, | |
| "loss": 0.5349, | |
| "step": 46000 | |
| }, | |
| { | |
| "ar_loss": 0.3749, | |
| "epoch": 6.937740199846272, | |
| "fm_loss": 0.1598, | |
| "grad_norm": 2.1425936222076416, | |
| "learning_rate": 9.778407471320134e-06, | |
| "loss": 0.5347, | |
| "step": 46100 | |
| }, | |
| { | |
| "ar_loss": 0.3719, | |
| "epoch": 6.9531129900076865, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 7.313533782958984, | |
| "learning_rate": 9.682710333541622e-06, | |
| "loss": 0.5303, | |
| "step": 46200 | |
| }, | |
| { | |
| "ar_loss": 0.3737, | |
| "epoch": 6.9684857801691, | |
| "fm_loss": 0.1586, | |
| "grad_norm": 1.9248658418655396, | |
| "learning_rate": 9.587371183330723e-06, | |
| "loss": 0.5323, | |
| "step": 46300 | |
| }, | |
| { | |
| "ar_loss": 0.3728, | |
| "epoch": 6.983858570330515, | |
| "fm_loss": 0.1596, | |
| "grad_norm": 2.7737770080566406, | |
| "learning_rate": 9.492392248903505e-06, | |
| "loss": 0.5324, | |
| "step": 46400 | |
| }, | |
| { | |
| "ar_loss": 0.3755, | |
| "epoch": 6.99923136049193, | |
| "fm_loss": 0.1585, | |
| "grad_norm": 2.6747500896453857, | |
| "learning_rate": 9.397775750057206e-06, | |
| "loss": 0.5341, | |
| "step": 46500 | |
| }, | |
| { | |
| "ar_loss": 0.3746, | |
| "epoch": 7.014604150653343, | |
| "fm_loss": 0.1594, | |
| "grad_norm": 6.737438678741455, | |
| "learning_rate": 9.303523898118444e-06, | |
| "loss": 0.534, | |
| "step": 46600 | |
| }, | |
| { | |
| "ar_loss": 0.3741, | |
| "epoch": 7.029976940814758, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 3.4723405838012695, | |
| "learning_rate": 9.209638895891501e-06, | |
| "loss": 0.5331, | |
| "step": 46700 | |
| }, | |
| { | |
| "ar_loss": 0.3735, | |
| "epoch": 7.045349730976172, | |
| "fm_loss": 0.1594, | |
| "grad_norm": 6.770826816558838, | |
| "learning_rate": 9.116122937606835e-06, | |
| "loss": 0.5329, | |
| "step": 46800 | |
| }, | |
| { | |
| "ar_loss": 0.3735, | |
| "epoch": 7.060722521137587, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 11.931926727294922, | |
| "learning_rate": 9.022978208869808e-06, | |
| "loss": 0.5323, | |
| "step": 46900 | |
| }, | |
| { | |
| "ar_loss": 0.3734, | |
| "epoch": 7.076095311299, | |
| "fm_loss": 0.1582, | |
| "grad_norm": 1.7415313720703125, | |
| "learning_rate": 8.930206886609616e-06, | |
| "loss": 0.5316, | |
| "step": 47000 | |
| }, | |
| { | |
| "ar_loss": 0.3734, | |
| "epoch": 7.091468101460415, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 3.001337766647339, | |
| "learning_rate": 8.837811139028377e-06, | |
| "loss": 0.5325, | |
| "step": 47100 | |
| }, | |
| { | |
| "ar_loss": 0.3719, | |
| "epoch": 7.10684089162183, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 2.8166637420654297, | |
| "learning_rate": 8.745793125550477e-06, | |
| "loss": 0.5307, | |
| "step": 47200 | |
| }, | |
| { | |
| "ar_loss": 0.3744, | |
| "epoch": 7.1222136817832435, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 10.10988712310791, | |
| "learning_rate": 8.654154996772114e-06, | |
| "loss": 0.5332, | |
| "step": 47300 | |
| }, | |
| { | |
| "ar_loss": 0.3758, | |
| "epoch": 7.137586471944658, | |
| "fm_loss": 0.1593, | |
| "grad_norm": 2.6831390857696533, | |
| "learning_rate": 8.562898894411017e-06, | |
| "loss": 0.5351, | |
| "step": 47400 | |
| }, | |
| { | |
| "ar_loss": 0.371, | |
| "epoch": 7.152959262106072, | |
| "fm_loss": 0.1592, | |
| "grad_norm": 1.959746241569519, | |
| "learning_rate": 8.472026951256381e-06, | |
| "loss": 0.5302, | |
| "step": 47500 | |
| }, | |
| { | |
| "ar_loss": 0.3719, | |
| "epoch": 7.168332052267487, | |
| "fm_loss": 0.1582, | |
| "grad_norm": 2.5866353511810303, | |
| "learning_rate": 8.38154129111908e-06, | |
| "loss": 0.53, | |
| "step": 47600 | |
| }, | |
| { | |
| "ar_loss": 0.3731, | |
| "epoch": 7.1837048424289005, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 2.6503565311431885, | |
| "learning_rate": 8.29144402878193e-06, | |
| "loss": 0.5314, | |
| "step": 47700 | |
| }, | |
| { | |
| "ar_loss": 0.372, | |
| "epoch": 7.199077632590315, | |
| "fm_loss": 0.1606, | |
| "grad_norm": 3.779261589050293, | |
| "learning_rate": 8.201737269950355e-06, | |
| "loss": 0.5325, | |
| "step": 47800 | |
| }, | |
| { | |
| "ar_loss": 0.371, | |
| "epoch": 7.21445042275173, | |
| "fm_loss": 0.1585, | |
| "grad_norm": 2.4449970722198486, | |
| "learning_rate": 8.112423111203124e-06, | |
| "loss": 0.5295, | |
| "step": 47900 | |
| }, | |
| { | |
| "ar_loss": 0.3738, | |
| "epoch": 7.229823212913144, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 2.2168331146240234, | |
| "learning_rate": 8.023503639943378e-06, | |
| "loss": 0.5322, | |
| "step": 48000 | |
| }, | |
| { | |
| "ar_loss": 0.3758, | |
| "epoch": 7.245196003074558, | |
| "fm_loss": 0.158, | |
| "grad_norm": 2.078047275543213, | |
| "learning_rate": 7.934980934349811e-06, | |
| "loss": 0.5338, | |
| "step": 48100 | |
| }, | |
| { | |
| "ar_loss": 0.3757, | |
| "epoch": 7.260568793235972, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 2.491028308868408, | |
| "learning_rate": 7.846857063328152e-06, | |
| "loss": 0.534, | |
| "step": 48200 | |
| }, | |
| { | |
| "ar_loss": 0.3748, | |
| "epoch": 7.275941583397387, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 3.340336799621582, | |
| "learning_rate": 7.759134086462753e-06, | |
| "loss": 0.5332, | |
| "step": 48300 | |
| }, | |
| { | |
| "ar_loss": 0.3735, | |
| "epoch": 7.291314373558801, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 3.3799524307250977, | |
| "learning_rate": 7.671814053968484e-06, | |
| "loss": 0.5319, | |
| "step": 48400 | |
| }, | |
| { | |
| "ar_loss": 0.3743, | |
| "epoch": 7.306687163720215, | |
| "fm_loss": 0.1587, | |
| "grad_norm": 3.1465113162994385, | |
| "learning_rate": 7.58489900664282e-06, | |
| "loss": 0.533, | |
| "step": 48500 | |
| }, | |
| { | |
| "ar_loss": 0.3723, | |
| "epoch": 7.322059953881629, | |
| "fm_loss": 0.1613, | |
| "grad_norm": 3.0010464191436768, | |
| "learning_rate": 7.49839097581814e-06, | |
| "loss": 0.5336, | |
| "step": 48600 | |
| }, | |
| { | |
| "ar_loss": 0.3727, | |
| "epoch": 7.337432744043044, | |
| "fm_loss": 0.1608, | |
| "grad_norm": 8.70785903930664, | |
| "learning_rate": 7.412291983314237e-06, | |
| "loss": 0.5335, | |
| "step": 48700 | |
| }, | |
| { | |
| "ar_loss": 0.3727, | |
| "epoch": 7.3528055342044585, | |
| "fm_loss": 0.158, | |
| "grad_norm": 3.002357244491577, | |
| "learning_rate": 7.326604041391089e-06, | |
| "loss": 0.5307, | |
| "step": 48800 | |
| }, | |
| { | |
| "ar_loss": 0.3715, | |
| "epoch": 7.368178324365872, | |
| "fm_loss": 0.1585, | |
| "grad_norm": 2.6171886920928955, | |
| "learning_rate": 7.241329152701812e-06, | |
| "loss": 0.53, | |
| "step": 48900 | |
| }, | |
| { | |
| "ar_loss": 0.3744, | |
| "epoch": 7.383551114527287, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 2.283306360244751, | |
| "learning_rate": 7.156469310245864e-06, | |
| "loss": 0.5326, | |
| "step": 49000 | |
| }, | |
| { | |
| "ar_loss": 0.3744, | |
| "epoch": 7.398923904688701, | |
| "fm_loss": 0.1595, | |
| "grad_norm": 6.2937331199646, | |
| "learning_rate": 7.07202649732246e-06, | |
| "loss": 0.5338, | |
| "step": 49100 | |
| }, | |
| { | |
| "ar_loss": 0.3744, | |
| "epoch": 7.414296694850115, | |
| "fm_loss": 0.1601, | |
| "grad_norm": 4.580973148345947, | |
| "learning_rate": 6.988002687484222e-06, | |
| "loss": 0.5345, | |
| "step": 49200 | |
| }, | |
| { | |
| "ar_loss": 0.3728, | |
| "epoch": 7.429669485011529, | |
| "fm_loss": 0.158, | |
| "grad_norm": 2.2313263416290283, | |
| "learning_rate": 6.904399844491058e-06, | |
| "loss": 0.5308, | |
| "step": 49300 | |
| }, | |
| { | |
| "ar_loss": 0.3735, | |
| "epoch": 7.445042275172944, | |
| "fm_loss": 0.1595, | |
| "grad_norm": 3.6399624347686768, | |
| "learning_rate": 6.821219922264252e-06, | |
| "loss": 0.533, | |
| "step": 49400 | |
| }, | |
| { | |
| "ar_loss": 0.3752, | |
| "epoch": 7.460415065334358, | |
| "fm_loss": 0.1585, | |
| "grad_norm": 3.077457904815674, | |
| "learning_rate": 6.73846486484083e-06, | |
| "loss": 0.5337, | |
| "step": 49500 | |
| }, | |
| { | |
| "ar_loss": 0.3743, | |
| "epoch": 7.475787855495772, | |
| "fm_loss": 0.1581, | |
| "grad_norm": 2.1495542526245117, | |
| "learning_rate": 6.6561366063280784e-06, | |
| "loss": 0.5324, | |
| "step": 49600 | |
| }, | |
| { | |
| "ar_loss": 0.3727, | |
| "epoch": 7.491160645657187, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 4.248561382293701, | |
| "learning_rate": 6.574237070858383e-06, | |
| "loss": 0.5311, | |
| "step": 49700 | |
| }, | |
| { | |
| "ar_loss": 0.3725, | |
| "epoch": 7.506533435818601, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 1.8635345697402954, | |
| "learning_rate": 6.492768172544231e-06, | |
| "loss": 0.5314, | |
| "step": 49800 | |
| }, | |
| { | |
| "ar_loss": 0.3739, | |
| "epoch": 7.521906225980016, | |
| "fm_loss": 0.159, | |
| "grad_norm": 4.352962970733643, | |
| "learning_rate": 6.411731815433492e-06, | |
| "loss": 0.5329, | |
| "step": 49900 | |
| }, | |
| { | |
| "ar_loss": 0.3744, | |
| "epoch": 7.537279016141429, | |
| "fm_loss": 0.1597, | |
| "grad_norm": 1.9769291877746582, | |
| "learning_rate": 6.33112989346491e-06, | |
| "loss": 0.5341, | |
| "step": 50000 | |
| }, | |
| { | |
| "ar_loss": 0.3716, | |
| "epoch": 7.552651806302844, | |
| "fm_loss": 0.1728, | |
| "grad_norm": 2.1171858310699463, | |
| "learning_rate": 6.250964290423847e-06, | |
| "loss": 0.5445, | |
| "step": 50100 | |
| }, | |
| { | |
| "ar_loss": 0.3727, | |
| "epoch": 7.568024596464259, | |
| "fm_loss": 0.1592, | |
| "grad_norm": 2.2346949577331543, | |
| "learning_rate": 6.171236879898243e-06, | |
| "loss": 0.5319, | |
| "step": 50200 | |
| }, | |
| { | |
| "ar_loss": 0.3733, | |
| "epoch": 7.5833973866256725, | |
| "fm_loss": 0.1606, | |
| "grad_norm": 3.1455023288726807, | |
| "learning_rate": 6.091949525234838e-06, | |
| "loss": 0.5339, | |
| "step": 50300 | |
| }, | |
| { | |
| "ar_loss": 0.3767, | |
| "epoch": 7.598770176787087, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 4.480979919433594, | |
| "learning_rate": 6.013104079495621e-06, | |
| "loss": 0.535, | |
| "step": 50400 | |
| }, | |
| { | |
| "ar_loss": 0.3706, | |
| "epoch": 7.614142966948501, | |
| "fm_loss": 0.1593, | |
| "grad_norm": 3.0612711906433105, | |
| "learning_rate": 5.934702385414517e-06, | |
| "loss": 0.5299, | |
| "step": 50500 | |
| }, | |
| { | |
| "ar_loss": 0.3729, | |
| "epoch": 7.629515757109916, | |
| "fm_loss": 0.1585, | |
| "grad_norm": 2.3492562770843506, | |
| "learning_rate": 5.856746275354322e-06, | |
| "loss": 0.5313, | |
| "step": 50600 | |
| }, | |
| { | |
| "ar_loss": 0.3735, | |
| "epoch": 7.6448885472713295, | |
| "fm_loss": 0.1585, | |
| "grad_norm": 4.781151294708252, | |
| "learning_rate": 5.77923757126389e-06, | |
| "loss": 0.532, | |
| "step": 50700 | |
| }, | |
| { | |
| "ar_loss": 0.3714, | |
| "epoch": 7.660261337432744, | |
| "fm_loss": 0.159, | |
| "grad_norm": 4.14220666885376, | |
| "learning_rate": 5.702178084635526e-06, | |
| "loss": 0.5304, | |
| "step": 50800 | |
| }, | |
| { | |
| "ar_loss": 0.3755, | |
| "epoch": 7.675634127594158, | |
| "fm_loss": 0.1586, | |
| "grad_norm": 2.7182235717773438, | |
| "learning_rate": 5.625569616462672e-06, | |
| "loss": 0.5341, | |
| "step": 50900 | |
| }, | |
| { | |
| "ar_loss": 0.375, | |
| "epoch": 7.691006917755573, | |
| "fm_loss": 0.1587, | |
| "grad_norm": 2.1390786170959473, | |
| "learning_rate": 5.549413957197797e-06, | |
| "loss": 0.5337, | |
| "step": 51000 | |
| }, | |
| { | |
| "ar_loss": 0.3757, | |
| "epoch": 7.706379707916987, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 15.834261894226074, | |
| "learning_rate": 5.473712886710569e-06, | |
| "loss": 0.5348, | |
| "step": 51100 | |
| }, | |
| { | |
| "ar_loss": 0.3752, | |
| "epoch": 7.721752498078401, | |
| "fm_loss": 0.1587, | |
| "grad_norm": 3.1994242668151855, | |
| "learning_rate": 5.3984681742462435e-06, | |
| "loss": 0.5339, | |
| "step": 51200 | |
| }, | |
| { | |
| "ar_loss": 0.3728, | |
| "epoch": 7.737125288239816, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 3.1270108222961426, | |
| "learning_rate": 5.323681578384318e-06, | |
| "loss": 0.5312, | |
| "step": 51300 | |
| }, | |
| { | |
| "ar_loss": 0.3753, | |
| "epoch": 7.75249807840123, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 4.753870964050293, | |
| "learning_rate": 5.24935484699744e-06, | |
| "loss": 0.5341, | |
| "step": 51400 | |
| }, | |
| { | |
| "ar_loss": 0.3751, | |
| "epoch": 7.767870868562644, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 2.4398574829101562, | |
| "learning_rate": 5.175489717210532e-06, | |
| "loss": 0.5343, | |
| "step": 51500 | |
| }, | |
| { | |
| "ar_loss": 0.3723, | |
| "epoch": 7.783243658724058, | |
| "fm_loss": 0.1603, | |
| "grad_norm": 1.8831534385681152, | |
| "learning_rate": 5.102087915360229e-06, | |
| "loss": 0.5326, | |
| "step": 51600 | |
| }, | |
| { | |
| "ar_loss": 0.3731, | |
| "epoch": 7.798616448885473, | |
| "fm_loss": 0.1579, | |
| "grad_norm": 3.3795456886291504, | |
| "learning_rate": 5.0291511569544955e-06, | |
| "loss": 0.5311, | |
| "step": 51700 | |
| }, | |
| { | |
| "ar_loss": 0.3724, | |
| "epoch": 7.813989239046887, | |
| "fm_loss": 0.159, | |
| "grad_norm": 6.143826007843018, | |
| "learning_rate": 4.956681146632553e-06, | |
| "loss": 0.5314, | |
| "step": 51800 | |
| }, | |
| { | |
| "ar_loss": 0.3737, | |
| "epoch": 7.829362029208301, | |
| "fm_loss": 0.1575, | |
| "grad_norm": 3.264896869659424, | |
| "learning_rate": 4.884679578125029e-06, | |
| "loss": 0.5312, | |
| "step": 51900 | |
| }, | |
| { | |
| "ar_loss": 0.3744, | |
| "epoch": 7.844734819369716, | |
| "fm_loss": 0.1608, | |
| "grad_norm": 2.4835152626037598, | |
| "learning_rate": 4.813148134214396e-06, | |
| "loss": 0.5352, | |
| "step": 52000 | |
| }, | |
| { | |
| "ar_loss": 0.3762, | |
| "epoch": 7.86010760953113, | |
| "fm_loss": 0.1581, | |
| "grad_norm": 2.550990581512451, | |
| "learning_rate": 4.742088486695604e-06, | |
| "loss": 0.5343, | |
| "step": 52100 | |
| }, | |
| { | |
| "ar_loss": 0.3731, | |
| "epoch": 7.875480399692544, | |
| "fm_loss": 0.1575, | |
| "grad_norm": 6.491451740264893, | |
| "learning_rate": 4.671502296337033e-06, | |
| "loss": 0.5307, | |
| "step": 52200 | |
| }, | |
| { | |
| "ar_loss": 0.3729, | |
| "epoch": 7.890853189853958, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 3.544994354248047, | |
| "learning_rate": 4.60139121284168e-06, | |
| "loss": 0.5317, | |
| "step": 52300 | |
| }, | |
| { | |
| "ar_loss": 0.3753, | |
| "epoch": 7.906225980015373, | |
| "fm_loss": 0.1597, | |
| "grad_norm": 2.540804386138916, | |
| "learning_rate": 4.531756874808585e-06, | |
| "loss": 0.535, | |
| "step": 52400 | |
| }, | |
| { | |
| "ar_loss": 0.375, | |
| "epoch": 7.921598770176787, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 3.565279722213745, | |
| "learning_rate": 4.462600909694559e-06, | |
| "loss": 0.5334, | |
| "step": 52500 | |
| }, | |
| { | |
| "ar_loss": 0.3729, | |
| "epoch": 7.936971560338201, | |
| "fm_loss": 0.1593, | |
| "grad_norm": 2.623326539993286, | |
| "learning_rate": 4.393924933776122e-06, | |
| "loss": 0.5321, | |
| "step": 52600 | |
| }, | |
| { | |
| "ar_loss": 0.3763, | |
| "epoch": 7.952344350499615, | |
| "fm_loss": 0.158, | |
| "grad_norm": 3.847337007522583, | |
| "learning_rate": 4.325730552111754e-06, | |
| "loss": 0.5343, | |
| "step": 52700 | |
| }, | |
| { | |
| "ar_loss": 0.3734, | |
| "epoch": 7.96771714066103, | |
| "fm_loss": 0.1577, | |
| "grad_norm": 2.1040573120117188, | |
| "learning_rate": 4.258019358504359e-06, | |
| "loss": 0.5312, | |
| "step": 52800 | |
| }, | |
| { | |
| "ar_loss": 0.3727, | |
| "epoch": 7.983089930822445, | |
| "fm_loss": 0.1585, | |
| "grad_norm": 2.5626471042633057, | |
| "learning_rate": 4.190792935464033e-06, | |
| "loss": 0.5311, | |
| "step": 52900 | |
| }, | |
| { | |
| "ar_loss": 0.3735, | |
| "epoch": 7.998462720983858, | |
| "fm_loss": 0.16, | |
| "grad_norm": 2.844808340072632, | |
| "learning_rate": 4.124052854171068e-06, | |
| "loss": 0.5335, | |
| "step": 53000 | |
| }, | |
| { | |
| "ar_loss": 0.3713, | |
| "epoch": 8.013835511145272, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 3.6812970638275146, | |
| "learning_rate": 4.057800674439227e-06, | |
| "loss": 0.5302, | |
| "step": 53100 | |
| }, | |
| { | |
| "ar_loss": 0.3762, | |
| "epoch": 8.029208301306687, | |
| "fm_loss": 0.158, | |
| "grad_norm": 3.114596128463745, | |
| "learning_rate": 3.992037944679322e-06, | |
| "loss": 0.5342, | |
| "step": 53200 | |
| }, | |
| { | |
| "ar_loss": 0.3731, | |
| "epoch": 8.044581091468102, | |
| "fm_loss": 0.1582, | |
| "grad_norm": 7.761219501495361, | |
| "learning_rate": 3.926766201862972e-06, | |
| "loss": 0.5313, | |
| "step": 53300 | |
| }, | |
| { | |
| "ar_loss": 0.3736, | |
| "epoch": 8.059953881629516, | |
| "fm_loss": 0.1572, | |
| "grad_norm": 2.6152584552764893, | |
| "learning_rate": 3.861986971486725e-06, | |
| "loss": 0.5307, | |
| "step": 53400 | |
| }, | |
| { | |
| "ar_loss": 0.3762, | |
| "epoch": 8.07532667179093, | |
| "fm_loss": 0.1578, | |
| "grad_norm": 10.676106452941895, | |
| "learning_rate": 3.7977017675363826e-06, | |
| "loss": 0.5339, | |
| "step": 53500 | |
| }, | |
| { | |
| "ar_loss": 0.3778, | |
| "epoch": 8.090699461952344, | |
| "fm_loss": 0.1581, | |
| "grad_norm": 2.382251024246216, | |
| "learning_rate": 3.7339120924516276e-06, | |
| "loss": 0.5359, | |
| "step": 53600 | |
| }, | |
| { | |
| "ar_loss": 0.3755, | |
| "epoch": 8.106072252113758, | |
| "fm_loss": 0.1586, | |
| "grad_norm": 2.832390308380127, | |
| "learning_rate": 3.6706194370909025e-06, | |
| "loss": 0.5341, | |
| "step": 53700 | |
| }, | |
| { | |
| "ar_loss": 0.3725, | |
| "epoch": 8.121445042275173, | |
| "fm_loss": 0.1585, | |
| "grad_norm": 2.2325046062469482, | |
| "learning_rate": 3.6078252806965667e-06, | |
| "loss": 0.531, | |
| "step": 53800 | |
| }, | |
| { | |
| "ar_loss": 0.3749, | |
| "epoch": 8.136817832436588, | |
| "fm_loss": 0.1579, | |
| "grad_norm": 2.05092716217041, | |
| "learning_rate": 3.5455310908603293e-06, | |
| "loss": 0.5328, | |
| "step": 53900 | |
| }, | |
| { | |
| "ar_loss": 0.3705, | |
| "epoch": 8.152190622598, | |
| "fm_loss": 0.1623, | |
| "grad_norm": 2.234282970428467, | |
| "learning_rate": 3.4837383234889498e-06, | |
| "loss": 0.5328, | |
| "step": 54000 | |
| }, | |
| { | |
| "ar_loss": 0.3727, | |
| "epoch": 8.167563412759415, | |
| "fm_loss": 0.1582, | |
| "grad_norm": 2.087803363800049, | |
| "learning_rate": 3.422448422770197e-06, | |
| "loss": 0.5309, | |
| "step": 54100 | |
| }, | |
| { | |
| "ar_loss": 0.3737, | |
| "epoch": 8.18293620292083, | |
| "fm_loss": 0.1579, | |
| "grad_norm": 2.0325510501861572, | |
| "learning_rate": 3.3616628211391193e-06, | |
| "loss": 0.5316, | |
| "step": 54200 | |
| }, | |
| { | |
| "ar_loss": 0.3734, | |
| "epoch": 8.198308993082245, | |
| "fm_loss": 0.1578, | |
| "grad_norm": 4.768230438232422, | |
| "learning_rate": 3.3013829392445434e-06, | |
| "loss": 0.5312, | |
| "step": 54300 | |
| }, | |
| { | |
| "ar_loss": 0.371, | |
| "epoch": 8.21368178324366, | |
| "fm_loss": 0.1578, | |
| "grad_norm": 9.634552955627441, | |
| "learning_rate": 3.2416101859158887e-06, | |
| "loss": 0.5289, | |
| "step": 54400 | |
| }, | |
| { | |
| "ar_loss": 0.3733, | |
| "epoch": 8.229054573405072, | |
| "fm_loss": 0.16, | |
| "grad_norm": 2.277290105819702, | |
| "learning_rate": 3.1823459581302394e-06, | |
| "loss": 0.5333, | |
| "step": 54500 | |
| }, | |
| { | |
| "ar_loss": 0.3716, | |
| "epoch": 8.244427363566487, | |
| "fm_loss": 0.158, | |
| "grad_norm": 6.1840996742248535, | |
| "learning_rate": 3.123591640979681e-06, | |
| "loss": 0.5296, | |
| "step": 54600 | |
| }, | |
| { | |
| "ar_loss": 0.3735, | |
| "epoch": 8.259800153727902, | |
| "fm_loss": 0.1592, | |
| "grad_norm": 3.6673200130462646, | |
| "learning_rate": 3.065348607638946e-06, | |
| "loss": 0.5327, | |
| "step": 54700 | |
| }, | |
| { | |
| "ar_loss": 0.3722, | |
| "epoch": 8.275172943889316, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 2.28403902053833, | |
| "learning_rate": 3.0076182193333053e-06, | |
| "loss": 0.5306, | |
| "step": 54800 | |
| }, | |
| { | |
| "ar_loss": 0.3706, | |
| "epoch": 8.29054573405073, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 2.709955930709839, | |
| "learning_rate": 2.9504018253067673e-06, | |
| "loss": 0.5294, | |
| "step": 54900 | |
| }, | |
| { | |
| "ar_loss": 0.3732, | |
| "epoch": 8.305918524212144, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 2.438748359680176, | |
| "learning_rate": 2.8937007627905354e-06, | |
| "loss": 0.5314, | |
| "step": 55000 | |
| }, | |
| { | |
| "ar_loss": 0.3735, | |
| "epoch": 8.321291314373559, | |
| "fm_loss": 0.1589, | |
| "grad_norm": 1.9475317001342773, | |
| "learning_rate": 2.8375163569717645e-06, | |
| "loss": 0.5324, | |
| "step": 55100 | |
| }, | |
| { | |
| "ar_loss": 0.375, | |
| "epoch": 8.336664104534973, | |
| "fm_loss": 0.1587, | |
| "grad_norm": 2.430262804031372, | |
| "learning_rate": 2.781849920962576e-06, | |
| "loss": 0.5337, | |
| "step": 55200 | |
| }, | |
| { | |
| "ar_loss": 0.3742, | |
| "epoch": 8.352036894696388, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 3.527625799179077, | |
| "learning_rate": 2.726702755769381e-06, | |
| "loss": 0.5329, | |
| "step": 55300 | |
| }, | |
| { | |
| "ar_loss": 0.3744, | |
| "epoch": 8.367409684857801, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 2.9451231956481934, | |
| "learning_rate": 2.6720761502624674e-06, | |
| "loss": 0.5328, | |
| "step": 55400 | |
| }, | |
| { | |
| "ar_loss": 0.3716, | |
| "epoch": 8.382782475019216, | |
| "fm_loss": 0.1599, | |
| "grad_norm": 4.992424488067627, | |
| "learning_rate": 2.6179713811458726e-06, | |
| "loss": 0.5315, | |
| "step": 55500 | |
| }, | |
| { | |
| "ar_loss": 0.3727, | |
| "epoch": 8.39815526518063, | |
| "fm_loss": 0.1585, | |
| "grad_norm": 4.794875144958496, | |
| "learning_rate": 2.564389712927556e-06, | |
| "loss": 0.5313, | |
| "step": 55600 | |
| }, | |
| { | |
| "ar_loss": 0.3728, | |
| "epoch": 8.413528055342045, | |
| "fm_loss": 0.1608, | |
| "grad_norm": 7.288453102111816, | |
| "learning_rate": 2.5113323978898455e-06, | |
| "loss": 0.5336, | |
| "step": 55700 | |
| }, | |
| { | |
| "ar_loss": 0.3732, | |
| "epoch": 8.42890084550346, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 2.7333781719207764, | |
| "learning_rate": 2.458800676060158e-06, | |
| "loss": 0.5316, | |
| "step": 55800 | |
| }, | |
| { | |
| "ar_loss": 0.3737, | |
| "epoch": 8.444273635664873, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 2.3420047760009766, | |
| "learning_rate": 2.406795775182025e-06, | |
| "loss": 0.532, | |
| "step": 55900 | |
| }, | |
| { | |
| "ar_loss": 0.3742, | |
| "epoch": 8.459646425826287, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 2.3550283908843994, | |
| "learning_rate": 2.355318910686394e-06, | |
| "loss": 0.5325, | |
| "step": 56000 | |
| }, | |
| { | |
| "ar_loss": 0.3755, | |
| "epoch": 8.475019215987702, | |
| "fm_loss": 0.1609, | |
| "grad_norm": 2.22489595413208, | |
| "learning_rate": 2.304371285663237e-06, | |
| "loss": 0.5364, | |
| "step": 56100 | |
| }, | |
| { | |
| "ar_loss": 0.3723, | |
| "epoch": 8.490392006149117, | |
| "fm_loss": 0.1578, | |
| "grad_norm": 12.48835563659668, | |
| "learning_rate": 2.2539540908334157e-06, | |
| "loss": 0.5301, | |
| "step": 56200 | |
| }, | |
| { | |
| "ar_loss": 0.3729, | |
| "epoch": 8.50576479631053, | |
| "fm_loss": 0.159, | |
| "grad_norm": 2.215409994125366, | |
| "learning_rate": 2.204068504520859e-06, | |
| "loss": 0.5319, | |
| "step": 56300 | |
| }, | |
| { | |
| "ar_loss": 0.3739, | |
| "epoch": 8.521137586471944, | |
| "fm_loss": 0.1578, | |
| "grad_norm": 3.4783434867858887, | |
| "learning_rate": 2.15471569262502e-06, | |
| "loss": 0.5317, | |
| "step": 56400 | |
| }, | |
| { | |
| "ar_loss": 0.3721, | |
| "epoch": 8.536510376633359, | |
| "fm_loss": 0.1576, | |
| "grad_norm": 2.0251822471618652, | |
| "learning_rate": 2.1058968085936383e-06, | |
| "loss": 0.5297, | |
| "step": 56500 | |
| }, | |
| { | |
| "ar_loss": 0.374, | |
| "epoch": 8.551883166794774, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 11.012389183044434, | |
| "learning_rate": 2.0576129933957715e-06, | |
| "loss": 0.5323, | |
| "step": 56600 | |
| }, | |
| { | |
| "ar_loss": 0.373, | |
| "epoch": 8.567255956956188, | |
| "fm_loss": 0.1579, | |
| "grad_norm": 2.4049017429351807, | |
| "learning_rate": 2.009865375495129e-06, | |
| "loss": 0.531, | |
| "step": 56700 | |
| }, | |
| { | |
| "ar_loss": 0.3748, | |
| "epoch": 8.582628747117601, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 2.692699909210205, | |
| "learning_rate": 1.9626550708237075e-06, | |
| "loss": 0.5339, | |
| "step": 56800 | |
| }, | |
| { | |
| "ar_loss": 0.3729, | |
| "epoch": 8.598001537279016, | |
| "fm_loss": 0.1586, | |
| "grad_norm": 2.713548183441162, | |
| "learning_rate": 1.915983182755696e-06, | |
| "loss": 0.5315, | |
| "step": 56900 | |
| }, | |
| { | |
| "ar_loss": 0.3711, | |
| "epoch": 8.61337432744043, | |
| "fm_loss": 0.1575, | |
| "grad_norm": 6.727135181427002, | |
| "learning_rate": 1.8698508020817045e-06, | |
| "loss": 0.5286, | |
| "step": 57000 | |
| }, | |
| { | |
| "ar_loss": 0.3742, | |
| "epoch": 8.628747117601845, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 3.071518659591675, | |
| "learning_rate": 1.8242590069832617e-06, | |
| "loss": 0.5325, | |
| "step": 57100 | |
| }, | |
| { | |
| "ar_loss": 0.3712, | |
| "epoch": 8.644119907763258, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 2.538884401321411, | |
| "learning_rate": 1.7792088630076086e-06, | |
| "loss": 0.5296, | |
| "step": 57200 | |
| }, | |
| { | |
| "ar_loss": 0.374, | |
| "epoch": 8.659492697924673, | |
| "fm_loss": 0.1583, | |
| "grad_norm": 2.6885924339294434, | |
| "learning_rate": 1.7347014230428144e-06, | |
| "loss": 0.5323, | |
| "step": 57300 | |
| }, | |
| { | |
| "ar_loss": 0.3721, | |
| "epoch": 8.674865488086088, | |
| "fm_loss": 0.1579, | |
| "grad_norm": 2.660156726837158, | |
| "learning_rate": 1.6907377272931485e-06, | |
| "loss": 0.53, | |
| "step": 57400 | |
| }, | |
| { | |
| "ar_loss": 0.3725, | |
| "epoch": 8.690238278247502, | |
| "fm_loss": 0.1586, | |
| "grad_norm": 2.4190871715545654, | |
| "learning_rate": 1.6473188032547854e-06, | |
| "loss": 0.5311, | |
| "step": 57500 | |
| }, | |
| { | |
| "ar_loss": 0.3711, | |
| "epoch": 8.705611068408917, | |
| "fm_loss": 0.1581, | |
| "grad_norm": 3.2725918292999268, | |
| "learning_rate": 1.6044456656917839e-06, | |
| "loss": 0.5291, | |
| "step": 57600 | |
| }, | |
| { | |
| "ar_loss": 0.3725, | |
| "epoch": 8.72098385857033, | |
| "fm_loss": 0.1632, | |
| "grad_norm": 4.782108783721924, | |
| "learning_rate": 1.5621193166123648e-06, | |
| "loss": 0.5357, | |
| "step": 57700 | |
| }, | |
| { | |
| "ar_loss": 0.3747, | |
| "epoch": 8.736356648731745, | |
| "fm_loss": 0.158, | |
| "grad_norm": 7.434814929962158, | |
| "learning_rate": 1.5203407452455076e-06, | |
| "loss": 0.5327, | |
| "step": 57800 | |
| }, | |
| { | |
| "ar_loss": 0.3704, | |
| "epoch": 8.75172943889316, | |
| "fm_loss": 0.1581, | |
| "grad_norm": 2.1383039951324463, | |
| "learning_rate": 1.4791109280178129e-06, | |
| "loss": 0.5285, | |
| "step": 57900 | |
| }, | |
| { | |
| "ar_loss": 0.3772, | |
| "epoch": 8.767102229054574, | |
| "fm_loss": 0.158, | |
| "grad_norm": 26.415754318237305, | |
| "learning_rate": 1.4384308285306959e-06, | |
| "loss": 0.5352, | |
| "step": 58000 | |
| }, | |
| { | |
| "ar_loss": 0.3721, | |
| "epoch": 8.782475019215987, | |
| "fm_loss": 0.1575, | |
| "grad_norm": 8.601106643676758, | |
| "learning_rate": 1.3983013975378574e-06, | |
| "loss": 0.5296, | |
| "step": 58100 | |
| }, | |
| { | |
| "ar_loss": 0.372, | |
| "epoch": 8.797847809377402, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 2.2945618629455566, | |
| "learning_rate": 1.3587235729230764e-06, | |
| "loss": 0.5303, | |
| "step": 58200 | |
| }, | |
| { | |
| "ar_loss": 0.3747, | |
| "epoch": 8.813220599538816, | |
| "fm_loss": 0.1584, | |
| "grad_norm": 4.096945762634277, | |
| "learning_rate": 1.3196982796782636e-06, | |
| "loss": 0.5331, | |
| "step": 58300 | |
| }, | |
| { | |
| "ar_loss": 0.3752, | |
| "epoch": 8.82859338970023, | |
| "fm_loss": 0.1599, | |
| "grad_norm": 3.189495801925659, | |
| "learning_rate": 1.2812264298818737e-06, | |
| "loss": 0.5351, | |
| "step": 58400 | |
| }, | |
| { | |
| "ar_loss": 0.3725, | |
| "epoch": 8.843966179861646, | |
| "fm_loss": 0.1587, | |
| "grad_norm": 3.499967098236084, | |
| "learning_rate": 1.2433089226775662e-06, | |
| "loss": 0.5312, | |
| "step": 58500 | |
| }, | |
| { | |
| "ar_loss": 0.3716, | |
| "epoch": 8.859338970023058, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 2.2825992107391357, | |
| "learning_rate": 1.2059466442532004e-06, | |
| "loss": 0.5307, | |
| "step": 58600 | |
| }, | |
| { | |
| "ar_loss": 0.372, | |
| "epoch": 8.874711760184473, | |
| "fm_loss": 0.158, | |
| "grad_norm": 3.344238042831421, | |
| "learning_rate": 1.1691404678201317e-06, | |
| "loss": 0.53, | |
| "step": 58700 | |
| }, | |
| { | |
| "ar_loss": 0.372, | |
| "epoch": 8.890084550345888, | |
| "fm_loss": 0.1588, | |
| "grad_norm": 3.1672725677490234, | |
| "learning_rate": 1.1328912535927828e-06, | |
| "loss": 0.5308, | |
| "step": 58800 | |
| }, | |
| { | |
| "ar_loss": 0.3724, | |
| "epoch": 8.905457340507303, | |
| "fm_loss": 0.1581, | |
| "grad_norm": 19.84259605407715, | |
| "learning_rate": 1.0971998487685597e-06, | |
| "loss": 0.5304, | |
| "step": 58900 | |
| }, | |
| { | |
| "ar_loss": 0.3746, | |
| "epoch": 8.920830130668715, | |
| "fm_loss": 0.1598, | |
| "grad_norm": 3.0031962394714355, | |
| "learning_rate": 1.0620670875080397e-06, | |
| "loss": 0.5344, | |
| "step": 59000 | |
| }, | |
| { | |
| "ar_loss": 0.3723, | |
| "epoch": 8.93620292083013, | |
| "fm_loss": 0.159, | |
| "grad_norm": 6.419062614440918, | |
| "learning_rate": 1.0274937909154792e-06, | |
| "loss": 0.5314, | |
| "step": 59100 | |
| }, | |
| { | |
| "ar_loss": 0.3777, | |
| "epoch": 8.951575710991545, | |
| "fm_loss": 0.1597, | |
| "grad_norm": 2.2336511611938477, | |
| "learning_rate": 9.934807670196223e-07, | |
| "loss": 0.5373, | |
| "step": 59200 | |
| }, | |
| { | |
| "ar_loss": 0.3717, | |
| "epoch": 8.96694850115296, | |
| "fm_loss": 0.1585, | |
| "grad_norm": 1.7860270738601685, | |
| "learning_rate": 9.600288107548233e-07, | |
| "loss": 0.5302, | |
| "step": 59300 | |
| }, | |
| { | |
| "ar_loss": 0.3732, | |
| "epoch": 8.982321291314374, | |
| "fm_loss": 0.1591, | |
| "grad_norm": 2.5118651390075684, | |
| "learning_rate": 9.271387039424456e-07, | |
| "loss": 0.5322, | |
| "step": 59400 | |
| }, | |
| { | |
| "ar_loss": 0.3723, | |
| "epoch": 8.997694081475787, | |
| "fm_loss": 0.1586, | |
| "grad_norm": 2.007235288619995, | |
| "learning_rate": 8.948112152726285e-07, | |
| "loss": 0.5308, | |
| "step": 59500 | |
| }, | |
| { | |
| "ar_loss": 0.3726, | |
| "epoch": 9.013066871637202, | |
| "fm_loss": 0.1579, | |
| "grad_norm": 3.087045907974243, | |
| "learning_rate": 8.630471002862795e-07, | |
| "loss": 0.5305, | |
| "step": 59600 | |
| }, | |
| { | |
| "ar_loss": 0.373, | |
| "epoch": 9.028439661798616, | |
| "fm_loss": 0.1594, | |
| "grad_norm": 2.6808998584747314, | |
| "learning_rate": 8.318471013574442e-07, | |
| "loss": 0.5324, | |
| "step": 59700 | |
| }, | |
| { | |
| "ar_loss": 0.3725, | |
| "epoch": 9.043812451960031, | |
| "fm_loss": 0.1586, | |
| "grad_norm": 2.7164993286132812, | |
| "learning_rate": 8.01211947675945e-07, | |
| "loss": 0.5311, | |
| "step": 59800 | |
| }, | |
| { | |
| "ar_loss": 0.3736, | |
| "epoch": 9.059185242121446, | |
| "fm_loss": 0.1582, | |
| "grad_norm": 3.879828929901123, | |
| "learning_rate": 7.711423552303366e-07, | |
| "loss": 0.5318, | |
| "step": 59900 | |
| }, | |
| { | |
| "ar_loss": 0.3727, | |
| "epoch": 9.074558032282859, | |
| "fm_loss": 0.1574, | |
| "grad_norm": 2.1200876235961914, | |
| "learning_rate": 7.416390267911827e-07, | |
| "loss": 0.5301, | |
| "step": 60000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 65050, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.656055098925268e+20, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |