| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 7299, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.20550760378133992, | |
| "grad_norm": 0.022213317453861237, | |
| "learning_rate": 1.8629949308124403e-05, | |
| "loss": 0.1893, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.20550760378133992, | |
| "eval_accuracy": 0.9274409044193217, | |
| "eval_loss": 0.384206622838974, | |
| "eval_runtime": 66.4904, | |
| "eval_samples_per_second": 73.168, | |
| "eval_steps_per_second": 9.159, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.41101520756267984, | |
| "grad_norm": 0.016410009935498238, | |
| "learning_rate": 1.7259898616248804e-05, | |
| "loss": 0.0675, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.41101520756267984, | |
| "eval_accuracy": 0.994655704008222, | |
| "eval_loss": 0.028500495478510857, | |
| "eval_runtime": 66.4366, | |
| "eval_samples_per_second": 73.228, | |
| "eval_steps_per_second": 9.167, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6165228113440198, | |
| "grad_norm": 0.013172637671232224, | |
| "learning_rate": 1.58898479243732e-05, | |
| "loss": 0.0519, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6165228113440198, | |
| "eval_accuracy": 0.9852004110996917, | |
| "eval_loss": 0.06652244180440903, | |
| "eval_runtime": 37.1596, | |
| "eval_samples_per_second": 130.922, | |
| "eval_steps_per_second": 16.389, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8220304151253597, | |
| "grad_norm": 93.64683532714844, | |
| "learning_rate": 1.4519797232497603e-05, | |
| "loss": 0.0287, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8220304151253597, | |
| "eval_accuracy": 0.9891058581706064, | |
| "eval_loss": 0.05017192289233208, | |
| "eval_runtime": 31.6255, | |
| "eval_samples_per_second": 153.832, | |
| "eval_steps_per_second": 19.257, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0275380189066996, | |
| "grad_norm": 0.004394204821437597, | |
| "learning_rate": 1.3149746540622004e-05, | |
| "loss": 0.0334, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.0275380189066996, | |
| "eval_accuracy": 0.9905447070914697, | |
| "eval_loss": 0.04894111305475235, | |
| "eval_runtime": 47.4094, | |
| "eval_samples_per_second": 102.617, | |
| "eval_steps_per_second": 12.846, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.2330456226880395, | |
| "grad_norm": 0.0032397848553955555, | |
| "learning_rate": 1.1779695848746405e-05, | |
| "loss": 0.0218, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2330456226880395, | |
| "eval_accuracy": 0.9819116135662899, | |
| "eval_loss": 0.10269968956708908, | |
| "eval_runtime": 45.531, | |
| "eval_samples_per_second": 106.85, | |
| "eval_steps_per_second": 13.375, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.4385532264693794, | |
| "grad_norm": 0.0003408812917768955, | |
| "learning_rate": 1.0409645156870804e-05, | |
| "loss": 0.0102, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.4385532264693794, | |
| "eval_accuracy": 0.9944501541623844, | |
| "eval_loss": 0.04378344491124153, | |
| "eval_runtime": 45.7881, | |
| "eval_samples_per_second": 106.25, | |
| "eval_steps_per_second": 13.3, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.6440608302507194, | |
| "grad_norm": 0.0026256833225488663, | |
| "learning_rate": 9.039594464995205e-06, | |
| "loss": 0.0038, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.6440608302507194, | |
| "eval_accuracy": 0.9704008221993834, | |
| "eval_loss": 0.19674982130527496, | |
| "eval_runtime": 45.5104, | |
| "eval_samples_per_second": 106.899, | |
| "eval_steps_per_second": 13.382, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.8495684340320593, | |
| "grad_norm": 0.0002484402502886951, | |
| "learning_rate": 7.669543773119606e-06, | |
| "loss": 0.012, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.8495684340320593, | |
| "eval_accuracy": 0.9852004110996917, | |
| "eval_loss": 0.07537884265184402, | |
| "eval_runtime": 45.1736, | |
| "eval_samples_per_second": 107.696, | |
| "eval_steps_per_second": 13.481, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.055076037813399, | |
| "grad_norm": 0.0006908943178132176, | |
| "learning_rate": 6.299493081244007e-06, | |
| "loss": 0.0061, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.055076037813399, | |
| "eval_accuracy": 0.9730729701952724, | |
| "eval_loss": 0.166794553399086, | |
| "eval_runtime": 45.1304, | |
| "eval_samples_per_second": 107.799, | |
| "eval_steps_per_second": 13.494, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.260583641594739, | |
| "grad_norm": 0.00021314685000106692, | |
| "learning_rate": 4.929442389368407e-06, | |
| "loss": 0.003, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.260583641594739, | |
| "eval_accuracy": 0.9954779033915725, | |
| "eval_loss": 0.03057803027331829, | |
| "eval_runtime": 45.2961, | |
| "eval_samples_per_second": 107.404, | |
| "eval_steps_per_second": 13.445, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.466091245376079, | |
| "grad_norm": 0.00010584539995761588, | |
| "learning_rate": 3.5593916974928076e-06, | |
| "loss": 0.0, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.466091245376079, | |
| "eval_accuracy": 0.9926002055498458, | |
| "eval_loss": 0.05110383406281471, | |
| "eval_runtime": 45.54, | |
| "eval_samples_per_second": 106.829, | |
| "eval_steps_per_second": 13.373, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.671598849157419, | |
| "grad_norm": 9.755617793416604e-05, | |
| "learning_rate": 2.189341005617208e-06, | |
| "loss": 0.0, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.671598849157419, | |
| "eval_accuracy": 0.9921891058581707, | |
| "eval_loss": 0.05986848846077919, | |
| "eval_runtime": 45.0163, | |
| "eval_samples_per_second": 108.072, | |
| "eval_steps_per_second": 13.528, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.877106452938759, | |
| "grad_norm": 0.00011223769251955673, | |
| "learning_rate": 8.192903137416085e-07, | |
| "loss": 0.0003, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.877106452938759, | |
| "eval_accuracy": 0.9868448098663926, | |
| "eval_loss": 0.09568490833044052, | |
| "eval_runtime": 45.1871, | |
| "eval_samples_per_second": 107.663, | |
| "eval_steps_per_second": 13.477, | |
| "step": 7000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 7299, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 1.195019939840466e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |