| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 1071, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.032679738562091505, | |
| "grad_norm": 11.060789182905086, | |
| "learning_rate": 1.4814814814814815e-06, | |
| "loss": 0.8827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.8205808401107788, | |
| "step": 5, | |
| "valid_targets_mean": 779.9, | |
| "valid_targets_min": 250 | |
| }, | |
| { | |
| "epoch": 0.06535947712418301, | |
| "grad_norm": 6.973307415232975, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.8066, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.8751524686813354, | |
| "step": 10, | |
| "valid_targets_mean": 1240.4, | |
| "valid_targets_min": 217 | |
| }, | |
| { | |
| "epoch": 0.09803921568627451, | |
| "grad_norm": 4.5603144559339395, | |
| "learning_rate": 5.185185185185185e-06, | |
| "loss": 0.7469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.7398011684417725, | |
| "step": 15, | |
| "valid_targets_mean": 669.1, | |
| "valid_targets_min": 285 | |
| }, | |
| { | |
| "epoch": 0.13071895424836602, | |
| "grad_norm": 3.5517010420743618, | |
| "learning_rate": 7.0370370370370375e-06, | |
| "loss": 0.6986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.8484277129173279, | |
| "step": 20, | |
| "valid_targets_mean": 701.3, | |
| "valid_targets_min": 235 | |
| }, | |
| { | |
| "epoch": 0.16339869281045752, | |
| "grad_norm": 1.71312096923623, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 0.7292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.7061011791229248, | |
| "step": 25, | |
| "valid_targets_mean": 1292.7, | |
| "valid_targets_min": 226 | |
| }, | |
| { | |
| "epoch": 0.19607843137254902, | |
| "grad_norm": 1.1836260079640273, | |
| "learning_rate": 1.0740740740740742e-05, | |
| "loss": 0.6299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5523278713226318, | |
| "step": 30, | |
| "valid_targets_mean": 1475.2, | |
| "valid_targets_min": 285 | |
| }, | |
| { | |
| "epoch": 0.22875816993464052, | |
| "grad_norm": 0.7908890063519861, | |
| "learning_rate": 1.2592592592592593e-05, | |
| "loss": 0.4857, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3864622116088867, | |
| "step": 35, | |
| "valid_targets_mean": 2134.2, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 0.26143790849673204, | |
| "grad_norm": 1.3350691541627218, | |
| "learning_rate": 1.4444444444444446e-05, | |
| "loss": 0.5807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4980165958404541, | |
| "step": 40, | |
| "valid_targets_mean": 929.5, | |
| "valid_targets_min": 296 | |
| }, | |
| { | |
| "epoch": 0.29411764705882354, | |
| "grad_norm": 1.3513898058061415, | |
| "learning_rate": 1.6296296296296297e-05, | |
| "loss": 0.6403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6828616857528687, | |
| "step": 45, | |
| "valid_targets_mean": 1063.6, | |
| "valid_targets_min": 278 | |
| }, | |
| { | |
| "epoch": 0.32679738562091504, | |
| "grad_norm": 1.0371862834617929, | |
| "learning_rate": 1.814814814814815e-05, | |
| "loss": 0.5481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4910392761230469, | |
| "step": 50, | |
| "valid_targets_mean": 1120.2, | |
| "valid_targets_min": 268 | |
| }, | |
| { | |
| "epoch": 0.35947712418300654, | |
| "grad_norm": 1.280369114672039, | |
| "learning_rate": 2e-05, | |
| "loss": 0.475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5395255088806152, | |
| "step": 55, | |
| "valid_targets_mean": 998.9, | |
| "valid_targets_min": 242 | |
| }, | |
| { | |
| "epoch": 0.39215686274509803, | |
| "grad_norm": 1.117311206993178, | |
| "learning_rate": 2.1851851851851852e-05, | |
| "loss": 0.5648, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5647487640380859, | |
| "step": 60, | |
| "valid_targets_mean": 1208.2, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 0.42483660130718953, | |
| "grad_norm": 1.1653791876480772, | |
| "learning_rate": 2.3703703703703703e-05, | |
| "loss": 0.5476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5218506455421448, | |
| "step": 65, | |
| "valid_targets_mean": 1001.4, | |
| "valid_targets_min": 280 | |
| }, | |
| { | |
| "epoch": 0.45751633986928103, | |
| "grad_norm": 1.174967214115349, | |
| "learning_rate": 2.5555555555555554e-05, | |
| "loss": 0.5201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47852823138237, | |
| "step": 70, | |
| "valid_targets_mean": 1043.4, | |
| "valid_targets_min": 243 | |
| }, | |
| { | |
| "epoch": 0.49019607843137253, | |
| "grad_norm": 1.6604928495972278, | |
| "learning_rate": 2.740740740740741e-05, | |
| "loss": 0.5551, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5851165056228638, | |
| "step": 75, | |
| "valid_targets_mean": 576.4, | |
| "valid_targets_min": 287 | |
| }, | |
| { | |
| "epoch": 0.5228758169934641, | |
| "grad_norm": 1.0202504936662373, | |
| "learning_rate": 2.9259259259259262e-05, | |
| "loss": 0.5125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3803825378417969, | |
| "step": 80, | |
| "valid_targets_mean": 1004.8, | |
| "valid_targets_min": 274 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 1.1312120882470664, | |
| "learning_rate": 3.111111111111112e-05, | |
| "loss": 0.5344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5755863189697266, | |
| "step": 85, | |
| "valid_targets_mean": 1221.0, | |
| "valid_targets_min": 243 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 1.4325847007248265, | |
| "learning_rate": 3.2962962962962964e-05, | |
| "loss": 0.5168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6161978244781494, | |
| "step": 90, | |
| "valid_targets_mean": 746.8, | |
| "valid_targets_min": 322 | |
| }, | |
| { | |
| "epoch": 0.6209150326797386, | |
| "grad_norm": 0.9183077187637829, | |
| "learning_rate": 3.481481481481482e-05, | |
| "loss": 0.5328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45081230998039246, | |
| "step": 95, | |
| "valid_targets_mean": 1345.3, | |
| "valid_targets_min": 278 | |
| }, | |
| { | |
| "epoch": 0.6535947712418301, | |
| "grad_norm": 1.6429963233048106, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 0.582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6670611500740051, | |
| "step": 100, | |
| "valid_targets_mean": 619.7, | |
| "valid_targets_min": 270 | |
| }, | |
| { | |
| "epoch": 0.6862745098039216, | |
| "grad_norm": 1.1663889041506792, | |
| "learning_rate": 3.851851851851852e-05, | |
| "loss": 0.4839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4529034495353699, | |
| "step": 105, | |
| "valid_targets_mean": 863.6, | |
| "valid_targets_min": 236 | |
| }, | |
| { | |
| "epoch": 0.7189542483660131, | |
| "grad_norm": 1.2334368369716797, | |
| "learning_rate": 3.9999893574233685e-05, | |
| "loss": 0.5261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4745844304561615, | |
| "step": 110, | |
| "valid_targets_mean": 830.2, | |
| "valid_targets_min": 258 | |
| }, | |
| { | |
| "epoch": 0.7516339869281046, | |
| "grad_norm": 1.249990193797269, | |
| "learning_rate": 3.9996168791339075e-05, | |
| "loss": 0.5166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4642437696456909, | |
| "step": 115, | |
| "valid_targets_mean": 989.8, | |
| "valid_targets_min": 260 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 1.2281972228406923, | |
| "learning_rate": 3.998712385271904e-05, | |
| "loss": 0.5019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48900026082992554, | |
| "step": 120, | |
| "valid_targets_mean": 832.8, | |
| "valid_targets_min": 255 | |
| }, | |
| { | |
| "epoch": 0.8169934640522876, | |
| "grad_norm": 1.3239536346835794, | |
| "learning_rate": 3.997276116485867e-05, | |
| "loss": 0.5344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6641391515731812, | |
| "step": 125, | |
| "valid_targets_mean": 930.1, | |
| "valid_targets_min": 308 | |
| }, | |
| { | |
| "epoch": 0.8496732026143791, | |
| "grad_norm": 1.1579149024229984, | |
| "learning_rate": 3.995308454907679e-05, | |
| "loss": 0.542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5919806957244873, | |
| "step": 130, | |
| "valid_targets_mean": 1064.2, | |
| "valid_targets_min": 274 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": 1.0457817537071603, | |
| "learning_rate": 3.992809924050924e-05, | |
| "loss": 0.4896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5484431385993958, | |
| "step": 135, | |
| "valid_targets_mean": 1360.4, | |
| "valid_targets_min": 228 | |
| }, | |
| { | |
| "epoch": 0.9150326797385621, | |
| "grad_norm": 1.2511239930549563, | |
| "learning_rate": 3.9897811886716054e-05, | |
| "loss": 0.5363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.610672652721405, | |
| "step": 140, | |
| "valid_targets_mean": 1068.2, | |
| "valid_targets_min": 252 | |
| }, | |
| { | |
| "epoch": 0.9477124183006536, | |
| "grad_norm": 1.2337054029635952, | |
| "learning_rate": 3.986223054591281e-05, | |
| "loss": 0.5173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4866800606250763, | |
| "step": 145, | |
| "valid_targets_mean": 781.0, | |
| "valid_targets_min": 246 | |
| }, | |
| { | |
| "epoch": 0.9803921568627451, | |
| "grad_norm": 0.8076936922856879, | |
| "learning_rate": 3.982136468482665e-05, | |
| "loss": 0.4419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35834211111068726, | |
| "step": 150, | |
| "valid_targets_mean": 1508.8, | |
| "valid_targets_min": 264 | |
| }, | |
| { | |
| "epoch": 1.0130718954248366, | |
| "grad_norm": 0.7776333910648919, | |
| "learning_rate": 3.9775225176177595e-05, | |
| "loss": 0.3684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27405640482902527, | |
| "step": 155, | |
| "valid_targets_mean": 1327.5, | |
| "valid_targets_min": 235 | |
| }, | |
| { | |
| "epoch": 1.0457516339869282, | |
| "grad_norm": 1.1734500557798586, | |
| "learning_rate": 3.972382429578577e-05, | |
| "loss": 0.442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37759023904800415, | |
| "step": 160, | |
| "valid_targets_mean": 763.9, | |
| "valid_targets_min": 244 | |
| }, | |
| { | |
| "epoch": 1.0784313725490196, | |
| "grad_norm": 0.9239963152479674, | |
| "learning_rate": 3.966717571930529e-05, | |
| "loss": 0.4546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3895590901374817, | |
| "step": 165, | |
| "valid_targets_mean": 1424.2, | |
| "valid_targets_min": 258 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 0.7807825561166607, | |
| "learning_rate": 3.960529451858575e-05, | |
| "loss": 0.4344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3694984018802643, | |
| "step": 170, | |
| "valid_targets_mean": 1572.8, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 1.1437908496732025, | |
| "grad_norm": 0.7003455825119332, | |
| "learning_rate": 3.9538197157662226e-05, | |
| "loss": 0.4177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2527535557746887, | |
| "step": 175, | |
| "valid_targets_mean": 1521.2, | |
| "valid_targets_min": 217 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 1.7156135500256005, | |
| "learning_rate": 3.946590148837487e-05, | |
| "loss": 0.4335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47262683510780334, | |
| "step": 180, | |
| "valid_targets_mean": 566.9, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 1.2091503267973855, | |
| "grad_norm": 1.008970459747819, | |
| "learning_rate": 3.9388426745619266e-05, | |
| "loss": 0.4002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3759155869483948, | |
| "step": 185, | |
| "valid_targets_mean": 1439.6, | |
| "valid_targets_min": 273 | |
| }, | |
| { | |
| "epoch": 1.2418300653594772, | |
| "grad_norm": 1.327191452209238, | |
| "learning_rate": 3.930579354222883e-05, | |
| "loss": 0.4606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.463574081659317, | |
| "step": 190, | |
| "valid_targets_mean": 751.6, | |
| "valid_targets_min": 248 | |
| }, | |
| { | |
| "epoch": 1.2745098039215685, | |
| "grad_norm": 1.1345668523426535, | |
| "learning_rate": 3.921802386349057e-05, | |
| "loss": 0.4677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43786492943763733, | |
| "step": 195, | |
| "valid_targets_mean": 980.7, | |
| "valid_targets_min": 284 | |
| }, | |
| { | |
| "epoch": 1.3071895424836601, | |
| "grad_norm": 1.377027240152856, | |
| "learning_rate": 3.912514106129576e-05, | |
| "loss": 0.458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5057384967803955, | |
| "step": 200, | |
| "valid_targets_mean": 731.2, | |
| "valid_targets_min": 269 | |
| }, | |
| { | |
| "epoch": 1.3398692810457518, | |
| "grad_norm": 0.8297808392093082, | |
| "learning_rate": 3.902716984792685e-05, | |
| "loss": 0.4572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32464444637298584, | |
| "step": 205, | |
| "valid_targets_mean": 1297.6, | |
| "valid_targets_min": 245 | |
| }, | |
| { | |
| "epoch": 1.3725490196078431, | |
| "grad_norm": 0.982008610679383, | |
| "learning_rate": 3.8924136289482686e-05, | |
| "loss": 0.4438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49354782700538635, | |
| "step": 210, | |
| "valid_targets_mean": 1354.9, | |
| "valid_targets_min": 240 | |
| }, | |
| { | |
| "epoch": 1.4052287581699345, | |
| "grad_norm": 0.9583843270801939, | |
| "learning_rate": 3.881606779894329e-05, | |
| "loss": 0.476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4370919466018677, | |
| "step": 215, | |
| "valid_targets_mean": 1446.8, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 1.4379084967320261, | |
| "grad_norm": 0.981958627933756, | |
| "learning_rate": 3.8702993128876455e-05, | |
| "loss": 0.4424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4009607434272766, | |
| "step": 220, | |
| "valid_targets_mean": 1175.4, | |
| "valid_targets_min": 244 | |
| }, | |
| { | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": 1.0318349122572381, | |
| "learning_rate": 3.858494236378785e-05, | |
| "loss": 0.4517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34431201219558716, | |
| "step": 225, | |
| "valid_targets_mean": 848.2, | |
| "valid_targets_min": 266 | |
| }, | |
| { | |
| "epoch": 1.5032679738562091, | |
| "grad_norm": 1.1926829814685804, | |
| "learning_rate": 3.846194691211678e-05, | |
| "loss": 0.4507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5188771486282349, | |
| "step": 230, | |
| "valid_targets_mean": 975.6, | |
| "valid_targets_min": 265 | |
| }, | |
| { | |
| "epoch": 1.5359477124183005, | |
| "grad_norm": 1.3287118805218745, | |
| "learning_rate": 3.8334039497879694e-05, | |
| "loss": 0.4525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43521052598953247, | |
| "step": 235, | |
| "valid_targets_mean": 755.8, | |
| "valid_targets_min": 248 | |
| }, | |
| { | |
| "epoch": 1.5686274509803921, | |
| "grad_norm": 1.0912341313805085, | |
| "learning_rate": 3.8201254151963664e-05, | |
| "loss": 0.4507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4125358462333679, | |
| "step": 240, | |
| "valid_targets_mean": 935.4, | |
| "valid_targets_min": 269 | |
| }, | |
| { | |
| "epoch": 1.6013071895424837, | |
| "grad_norm": 1.337631920998683, | |
| "learning_rate": 3.8063626203072196e-05, | |
| "loss": 0.4416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4946654736995697, | |
| "step": 245, | |
| "valid_targets_mean": 760.1, | |
| "valid_targets_min": 229 | |
| }, | |
| { | |
| "epoch": 1.6339869281045751, | |
| "grad_norm": 0.7868890760626824, | |
| "learning_rate": 3.792119226832569e-05, | |
| "loss": 0.4301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3513905704021454, | |
| "step": 250, | |
| "valid_targets_mean": 1832.1, | |
| "valid_targets_min": 302 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 1.1418891712589625, | |
| "learning_rate": 3.7773990243519154e-05, | |
| "loss": 0.4653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43997740745544434, | |
| "step": 255, | |
| "valid_targets_mean": 1025.1, | |
| "valid_targets_min": 259 | |
| }, | |
| { | |
| "epoch": 1.6993464052287581, | |
| "grad_norm": 1.1982818361901046, | |
| "learning_rate": 3.762205929303969e-05, | |
| "loss": 0.443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.523041844367981, | |
| "step": 260, | |
| "valid_targets_mean": 893.4, | |
| "valid_targets_min": 243 | |
| }, | |
| { | |
| "epoch": 1.7320261437908497, | |
| "grad_norm": 1.4756814409685264, | |
| "learning_rate": 3.746543983944646e-05, | |
| "loss": 0.4581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4994322657585144, | |
| "step": 265, | |
| "valid_targets_mean": 681.1, | |
| "valid_targets_min": 268 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "grad_norm": 1.2251966207419902, | |
| "learning_rate": 3.730417355271593e-05, | |
| "loss": 0.4599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4771292209625244, | |
| "step": 270, | |
| "valid_targets_mean": 823.2, | |
| "valid_targets_min": 262 | |
| }, | |
| { | |
| "epoch": 1.7973856209150327, | |
| "grad_norm": 0.8560952770470027, | |
| "learning_rate": 3.713830333915517e-05, | |
| "loss": 0.4125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3659961223602295, | |
| "step": 275, | |
| "valid_targets_mean": 1743.6, | |
| "valid_targets_min": 277 | |
| }, | |
| { | |
| "epoch": 1.8300653594771243, | |
| "grad_norm": 1.2640409291326877, | |
| "learning_rate": 3.6967873329986305e-05, | |
| "loss": 0.3786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40237635374069214, | |
| "step": 280, | |
| "valid_targets_mean": 707.1, | |
| "valid_targets_min": 235 | |
| }, | |
| { | |
| "epoch": 1.8627450980392157, | |
| "grad_norm": 1.6042954967809109, | |
| "learning_rate": 3.679292886960497e-05, | |
| "loss": 0.436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47581565380096436, | |
| "step": 285, | |
| "valid_targets_mean": 964.8, | |
| "valid_targets_min": 248 | |
| }, | |
| { | |
| "epoch": 1.8954248366013071, | |
| "grad_norm": 1.5217135823702368, | |
| "learning_rate": 3.661351650351608e-05, | |
| "loss": 0.4431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4720376431941986, | |
| "step": 290, | |
| "valid_targets_mean": 604.7, | |
| "valid_targets_min": 236 | |
| }, | |
| { | |
| "epoch": 1.9281045751633987, | |
| "grad_norm": 1.4782923070866694, | |
| "learning_rate": 3.642968396594995e-05, | |
| "loss": 0.4515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5038082003593445, | |
| "step": 295, | |
| "valid_targets_mean": 618.2, | |
| "valid_targets_min": 258 | |
| }, | |
| { | |
| "epoch": 1.9607843137254903, | |
| "grad_norm": 1.1511433477713358, | |
| "learning_rate": 3.624148016716222e-05, | |
| "loss": 0.4676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5166301727294922, | |
| "step": 300, | |
| "valid_targets_mean": 1017.7, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 1.9934640522875817, | |
| "grad_norm": 0.7376916907598469, | |
| "learning_rate": 3.604895518042081e-05, | |
| "loss": 0.4447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2603156566619873, | |
| "step": 305, | |
| "valid_targets_mean": 1753.6, | |
| "valid_targets_min": 253 | |
| }, | |
| { | |
| "epoch": 2.026143790849673, | |
| "grad_norm": 1.1136650197465474, | |
| "learning_rate": 3.585216022868356e-05, | |
| "loss": 0.3317, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3538415729999542, | |
| "step": 310, | |
| "valid_targets_mean": 857.9, | |
| "valid_targets_min": 267 | |
| }, | |
| { | |
| "epoch": 2.0588235294117645, | |
| "grad_norm": 1.1254159871502543, | |
| "learning_rate": 3.565114767096984e-05, | |
| "loss": 0.3789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3277912735939026, | |
| "step": 315, | |
| "valid_targets_mean": 981.1, | |
| "valid_targets_min": 253 | |
| }, | |
| { | |
| "epoch": 2.0915032679738563, | |
| "grad_norm": 1.3128993623554719, | |
| "learning_rate": 3.544597098843001e-05, | |
| "loss": 0.3653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37582293152809143, | |
| "step": 320, | |
| "valid_targets_mean": 853.1, | |
| "valid_targets_min": 231 | |
| }, | |
| { | |
| "epoch": 2.1241830065359477, | |
| "grad_norm": 1.192620898541865, | |
| "learning_rate": 3.5236684770116295e-05, | |
| "loss": 0.3748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4059767723083496, | |
| "step": 325, | |
| "valid_targets_mean": 1326.2, | |
| "valid_targets_min": 219 | |
| }, | |
| { | |
| "epoch": 2.156862745098039, | |
| "grad_norm": 1.0971126049747322, | |
| "learning_rate": 3.502334469845886e-05, | |
| "loss": 0.3466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33508726954460144, | |
| "step": 330, | |
| "valid_targets_mean": 1035.2, | |
| "valid_targets_min": 269 | |
| }, | |
| { | |
| "epoch": 2.189542483660131, | |
| "grad_norm": 1.24119071299791, | |
| "learning_rate": 3.4806007534451075e-05, | |
| "loss": 0.3717, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4131796360015869, | |
| "step": 335, | |
| "valid_targets_mean": 1173.8, | |
| "valid_targets_min": 235 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 0.9019703404636363, | |
| "learning_rate": 3.458473110254767e-05, | |
| "loss": 0.3992, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3299303650856018, | |
| "step": 340, | |
| "valid_targets_mean": 1531.8, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 2.2549019607843137, | |
| "grad_norm": 1.3137476700390396, | |
| "learning_rate": 3.43595742752801e-05, | |
| "loss": 0.3575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33964088559150696, | |
| "step": 345, | |
| "valid_targets_mean": 799.1, | |
| "valid_targets_min": 285 | |
| }, | |
| { | |
| "epoch": 2.287581699346405, | |
| "grad_norm": 1.3766367222049374, | |
| "learning_rate": 3.413059695759297e-05, | |
| "loss": 0.3993, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41154414415359497, | |
| "step": 350, | |
| "valid_targets_mean": 812.0, | |
| "valid_targets_min": 222 | |
| }, | |
| { | |
| "epoch": 2.3202614379084965, | |
| "grad_norm": 0.694606217367308, | |
| "learning_rate": 3.389786007090581e-05, | |
| "loss": 0.3489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22677206993103027, | |
| "step": 355, | |
| "valid_targets_mean": 1935.1, | |
| "valid_targets_min": 252 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 1.6554928059464398, | |
| "learning_rate": 3.3661425536904354e-05, | |
| "loss": 0.3707, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4165865480899811, | |
| "step": 360, | |
| "valid_targets_mean": 564.1, | |
| "valid_targets_min": 242 | |
| }, | |
| { | |
| "epoch": 2.3856209150326797, | |
| "grad_norm": 1.092528086923834, | |
| "learning_rate": 3.3421356261065805e-05, | |
| "loss": 0.344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4011123776435852, | |
| "step": 365, | |
| "valid_targets_mean": 1385.9, | |
| "valid_targets_min": 273 | |
| }, | |
| { | |
| "epoch": 2.418300653594771, | |
| "grad_norm": 1.0606643850646214, | |
| "learning_rate": 3.317771611592222e-05, | |
| "loss": 0.3509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3307228088378906, | |
| "step": 370, | |
| "valid_targets_mean": 1428.1, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 2.450980392156863, | |
| "grad_norm": 1.2049996354335248, | |
| "learning_rate": 3.293056992406671e-05, | |
| "loss": 0.3718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3773796856403351, | |
| "step": 375, | |
| "valid_targets_mean": 1160.4, | |
| "valid_targets_min": 216 | |
| }, | |
| { | |
| "epoch": 2.4836601307189543, | |
| "grad_norm": 1.2899198397563096, | |
| "learning_rate": 3.267998344090679e-05, | |
| "loss": 0.3411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38119545578956604, | |
| "step": 380, | |
| "valid_targets_mean": 775.1, | |
| "valid_targets_min": 240 | |
| }, | |
| { | |
| "epoch": 2.5163398692810457, | |
| "grad_norm": 1.1656133845239687, | |
| "learning_rate": 3.242602333716958e-05, | |
| "loss": 0.3492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3696956932544708, | |
| "step": 385, | |
| "valid_targets_mean": 932.7, | |
| "valid_targets_min": 262 | |
| }, | |
| { | |
| "epoch": 2.549019607843137, | |
| "grad_norm": 1.0524939861495455, | |
| "learning_rate": 3.21687571811635e-05, | |
| "loss": 0.3939, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3500986695289612, | |
| "step": 390, | |
| "valid_targets_mean": 1204.0, | |
| "valid_targets_min": 264 | |
| }, | |
| { | |
| "epoch": 2.581699346405229, | |
| "grad_norm": 1.5158107702474672, | |
| "learning_rate": 3.190825342080109e-05, | |
| "loss": 0.3632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41287726163864136, | |
| "step": 395, | |
| "valid_targets_mean": 684.8, | |
| "valid_targets_min": 246 | |
| }, | |
| { | |
| "epoch": 2.6143790849673203, | |
| "grad_norm": 1.0708264531248797, | |
| "learning_rate": 3.164458136538789e-05, | |
| "loss": 0.3384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2635033130645752, | |
| "step": 400, | |
| "valid_targets_mean": 931.7, | |
| "valid_targets_min": 285 | |
| }, | |
| { | |
| "epoch": 2.6470588235294117, | |
| "grad_norm": 1.1926595250829215, | |
| "learning_rate": 3.137781116718206e-05, | |
| "loss": 0.3844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3295871317386627, | |
| "step": 405, | |
| "valid_targets_mean": 954.6, | |
| "valid_targets_min": 238 | |
| }, | |
| { | |
| "epoch": 2.6797385620915035, | |
| "grad_norm": 1.4678135870422717, | |
| "learning_rate": 3.110801380272975e-05, | |
| "loss": 0.3714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40647825598716736, | |
| "step": 410, | |
| "valid_targets_mean": 701.2, | |
| "valid_targets_min": 224 | |
| }, | |
| { | |
| "epoch": 2.712418300653595, | |
| "grad_norm": 1.2361572946977641, | |
| "learning_rate": 3.0835261053981226e-05, | |
| "loss": 0.374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41568267345428467, | |
| "step": 415, | |
| "valid_targets_mean": 1122.1, | |
| "valid_targets_min": 274 | |
| }, | |
| { | |
| "epoch": 2.7450980392156863, | |
| "grad_norm": 1.0225753867309941, | |
| "learning_rate": 3.055962548919257e-05, | |
| "loss": 0.3541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32175296545028687, | |
| "step": 420, | |
| "valid_targets_mean": 1174.4, | |
| "valid_targets_min": 306 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 0.8940629697485692, | |
| "learning_rate": 3.0281180443618337e-05, | |
| "loss": 0.3622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30009371042251587, | |
| "step": 425, | |
| "valid_targets_mean": 1711.0, | |
| "valid_targets_min": 274 | |
| }, | |
| { | |
| "epoch": 2.810457516339869, | |
| "grad_norm": 0.8560645122217327, | |
| "learning_rate": 3.0000000000000004e-05, | |
| "loss": 0.3183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2567868232727051, | |
| "step": 430, | |
| "valid_targets_mean": 1575.6, | |
| "valid_targets_min": 181 | |
| }, | |
| { | |
| "epoch": 2.843137254901961, | |
| "grad_norm": 1.4792577436560297, | |
| "learning_rate": 2.9716158968855665e-05, | |
| "loss": 0.3729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3718124330043793, | |
| "step": 435, | |
| "valid_targets_mean": 682.1, | |
| "valid_targets_min": 256 | |
| }, | |
| { | |
| "epoch": 2.8758169934640523, | |
| "grad_norm": 0.7873403934813155, | |
| "learning_rate": 2.9429732868576e-05, | |
| "loss": 0.3203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22500234842300415, | |
| "step": 440, | |
| "valid_targets_mean": 1949.4, | |
| "valid_targets_min": 243 | |
| }, | |
| { | |
| "epoch": 2.9084967320261437, | |
| "grad_norm": 1.31414956514437, | |
| "learning_rate": 2.9140797905331964e-05, | |
| "loss": 0.3724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3907439410686493, | |
| "step": 445, | |
| "valid_targets_mean": 806.3, | |
| "valid_targets_min": 277 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": 1.5773260718196254, | |
| "learning_rate": 2.884943095279946e-05, | |
| "loss": 0.3809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44405168294906616, | |
| "step": 450, | |
| "valid_targets_mean": 660.8, | |
| "valid_targets_min": 274 | |
| }, | |
| { | |
| "epoch": 2.973856209150327, | |
| "grad_norm": 1.1417920727842683, | |
| "learning_rate": 2.8555709531706423e-05, | |
| "loss": 0.3911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2857981026172638, | |
| "step": 455, | |
| "valid_targets_mean": 842.0, | |
| "valid_targets_min": 255 | |
| }, | |
| { | |
| "epoch": 3.0065359477124183, | |
| "grad_norm": 1.4849160766480622, | |
| "learning_rate": 2.825971178920777e-05, | |
| "loss": 0.3648, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2776503562927246, | |
| "step": 460, | |
| "valid_targets_mean": 744.2, | |
| "valid_targets_min": 238 | |
| }, | |
| { | |
| "epoch": 3.0392156862745097, | |
| "grad_norm": 1.6833400262584757, | |
| "learning_rate": 2.796151647809364e-05, | |
| "loss": 0.2969, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3672228455543518, | |
| "step": 465, | |
| "valid_targets_mean": 619.8, | |
| "valid_targets_min": 253 | |
| }, | |
| { | |
| "epoch": 3.0718954248366015, | |
| "grad_norm": 1.5789185393237866, | |
| "learning_rate": 2.7661202935836536e-05, | |
| "loss": 0.3121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3267885148525238, | |
| "step": 470, | |
| "valid_targets_mean": 747.2, | |
| "valid_targets_min": 253 | |
| }, | |
| { | |
| "epoch": 3.104575163398693, | |
| "grad_norm": 1.525005567408752, | |
| "learning_rate": 2.73588510634829e-05, | |
| "loss": 0.3271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3514612913131714, | |
| "step": 475, | |
| "valid_targets_mean": 866.7, | |
| "valid_targets_min": 291 | |
| }, | |
| { | |
| "epoch": 3.1372549019607843, | |
| "grad_norm": 1.2913239483066061, | |
| "learning_rate": 2.7054541304394736e-05, | |
| "loss": 0.3019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2418668270111084, | |
| "step": 480, | |
| "valid_targets_mean": 889.4, | |
| "valid_targets_min": 243 | |
| }, | |
| { | |
| "epoch": 3.1699346405228757, | |
| "grad_norm": 1.2082673756279816, | |
| "learning_rate": 2.6748354622846962e-05, | |
| "loss": 0.251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24368639290332794, | |
| "step": 485, | |
| "valid_targets_mean": 1031.2, | |
| "valid_targets_min": 289 | |
| }, | |
| { | |
| "epoch": 3.2026143790849675, | |
| "grad_norm": 1.1922254782261243, | |
| "learning_rate": 2.6440372482486127e-05, | |
| "loss": 0.2652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2941184639930725, | |
| "step": 490, | |
| "valid_targets_mean": 1239.8, | |
| "valid_targets_min": 270 | |
| }, | |
| { | |
| "epoch": 3.235294117647059, | |
| "grad_norm": 1.6204743095393073, | |
| "learning_rate": 2.613067682465631e-05, | |
| "loss": 0.28, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34600523114204407, | |
| "step": 495, | |
| "valid_targets_mean": 727.4, | |
| "valid_targets_min": 259 | |
| }, | |
| { | |
| "epoch": 3.2679738562091503, | |
| "grad_norm": 1.7067005151903063, | |
| "learning_rate": 2.5819350046597927e-05, | |
| "loss": 0.297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33082401752471924, | |
| "step": 500, | |
| "valid_targets_mean": 711.6, | |
| "valid_targets_min": 266 | |
| }, | |
| { | |
| "epoch": 3.3006535947712417, | |
| "grad_norm": 1.344918045352721, | |
| "learning_rate": 2.55064749795252e-05, | |
| "loss": 0.3077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29931801557540894, | |
| "step": 505, | |
| "valid_targets_mean": 1001.1, | |
| "valid_targets_min": 219 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 1.5947027063326324, | |
| "learning_rate": 2.519213486658819e-05, | |
| "loss": 0.2939, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3358372151851654, | |
| "step": 510, | |
| "valid_targets_mean": 640.4, | |
| "valid_targets_min": 257 | |
| }, | |
| { | |
| "epoch": 3.366013071895425, | |
| "grad_norm": 2.1390767780943434, | |
| "learning_rate": 2.4876413340725244e-05, | |
| "loss": 0.3398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3361467719078064, | |
| "step": 515, | |
| "valid_targets_mean": 840.6, | |
| "valid_targets_min": 290 | |
| }, | |
| { | |
| "epoch": 3.3986928104575163, | |
| "grad_norm": 1.4750832729481385, | |
| "learning_rate": 2.4559394402411703e-05, | |
| "loss": 0.3096, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28374865651130676, | |
| "step": 520, | |
| "valid_targets_mean": 662.6, | |
| "valid_targets_min": 258 | |
| }, | |
| { | |
| "epoch": 3.431372549019608, | |
| "grad_norm": 1.1883189575628739, | |
| "learning_rate": 2.4241162397310836e-05, | |
| "loss": 0.2718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.288798451423645, | |
| "step": 525, | |
| "valid_targets_mean": 1315.8, | |
| "valid_targets_min": 253 | |
| }, | |
| { | |
| "epoch": 3.4640522875816995, | |
| "grad_norm": 1.4617369234648558, | |
| "learning_rate": 2.3921801993832964e-05, | |
| "loss": 0.2899, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35622355341911316, | |
| "step": 530, | |
| "valid_targets_mean": 895.0, | |
| "valid_targets_min": 292 | |
| }, | |
| { | |
| "epoch": 3.496732026143791, | |
| "grad_norm": 1.0468449297189089, | |
| "learning_rate": 2.3601398160608667e-05, | |
| "loss": 0.2905, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2579011917114258, | |
| "step": 535, | |
| "valid_targets_mean": 1593.1, | |
| "valid_targets_min": 261 | |
| }, | |
| { | |
| "epoch": 3.5294117647058822, | |
| "grad_norm": 1.7782119793681848, | |
| "learning_rate": 2.3280036143882145e-05, | |
| "loss": 0.3064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.340090811252594, | |
| "step": 540, | |
| "valid_targets_mean": 528.3, | |
| "valid_targets_min": 285 | |
| }, | |
| { | |
| "epoch": 3.5620915032679736, | |
| "grad_norm": 1.1956892052285935, | |
| "learning_rate": 2.2957801444830684e-05, | |
| "loss": 0.2679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2931376099586487, | |
| "step": 545, | |
| "valid_targets_mean": 1387.6, | |
| "valid_targets_min": 280 | |
| }, | |
| { | |
| "epoch": 3.5947712418300655, | |
| "grad_norm": 1.330594535756061, | |
| "learning_rate": 2.2634779796816377e-05, | |
| "loss": 0.329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29296964406967163, | |
| "step": 550, | |
| "valid_targets_mean": 1105.3, | |
| "valid_targets_min": 279 | |
| }, | |
| { | |
| "epoch": 3.627450980392157, | |
| "grad_norm": 0.9003334164561678, | |
| "learning_rate": 2.2311057142575953e-05, | |
| "loss": 0.2683, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1992267370223999, | |
| "step": 555, | |
| "valid_targets_mean": 1443.8, | |
| "valid_targets_min": 236 | |
| }, | |
| { | |
| "epoch": 3.6601307189542482, | |
| "grad_norm": 1.5529200780881063, | |
| "learning_rate": 2.198671961135498e-05, | |
| "loss": 0.2962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3122693598270416, | |
| "step": 560, | |
| "valid_targets_mean": 701.2, | |
| "valid_targets_min": 240 | |
| }, | |
| { | |
| "epoch": 3.69281045751634, | |
| "grad_norm": 1.5208864513474185, | |
| "learning_rate": 2.166185349599245e-05, | |
| "loss": 0.2646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26625901460647583, | |
| "step": 565, | |
| "valid_targets_mean": 877.5, | |
| "valid_targets_min": 260 | |
| }, | |
| { | |
| "epoch": 3.7254901960784315, | |
| "grad_norm": 1.0194886982642561, | |
| "learning_rate": 2.1336545229961772e-05, | |
| "loss": 0.3037, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25733599066734314, | |
| "step": 570, | |
| "valid_targets_mean": 1400.1, | |
| "valid_targets_min": 231 | |
| }, | |
| { | |
| "epoch": 3.758169934640523, | |
| "grad_norm": 1.1707112854047086, | |
| "learning_rate": 2.1010881364374404e-05, | |
| "loss": 0.323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35556066036224365, | |
| "step": 575, | |
| "valid_targets_mean": 1315.2, | |
| "valid_targets_min": 230 | |
| }, | |
| { | |
| "epoch": 3.7908496732026142, | |
| "grad_norm": 1.0812520101554173, | |
| "learning_rate": 2.0684948544952217e-05, | |
| "loss": 0.2629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21427355706691742, | |
| "step": 580, | |
| "valid_targets_mean": 978.9, | |
| "valid_targets_min": 196 | |
| }, | |
| { | |
| "epoch": 3.8235294117647056, | |
| "grad_norm": 1.7609304063968088, | |
| "learning_rate": 2.0358833488974556e-05, | |
| "loss": 0.3031, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28584054112434387, | |
| "step": 585, | |
| "valid_targets_mean": 803.2, | |
| "valid_targets_min": 251 | |
| }, | |
| { | |
| "epoch": 3.8562091503267975, | |
| "grad_norm": 1.372524503176478, | |
| "learning_rate": 2.0032622962206428e-05, | |
| "loss": 0.3152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25724029541015625, | |
| "step": 590, | |
| "valid_targets_mean": 837.6, | |
| "valid_targets_min": 240 | |
| }, | |
| { | |
| "epoch": 3.888888888888889, | |
| "grad_norm": 1.551633309136953, | |
| "learning_rate": 1.9706403755813672e-05, | |
| "loss": 0.2989, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3098876476287842, | |
| "step": 595, | |
| "valid_targets_mean": 778.6, | |
| "valid_targets_min": 252 | |
| }, | |
| { | |
| "epoch": 3.9215686274509802, | |
| "grad_norm": 1.4200996111858124, | |
| "learning_rate": 1.9380262663271407e-05, | |
| "loss": 0.2982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28785258531570435, | |
| "step": 600, | |
| "valid_targets_mean": 1276.4, | |
| "valid_targets_min": 246 | |
| }, | |
| { | |
| "epoch": 3.954248366013072, | |
| "grad_norm": 1.1423875399407577, | |
| "learning_rate": 1.9054286457271892e-05, | |
| "loss": 0.2867, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26374930143356323, | |
| "step": 605, | |
| "valid_targets_mean": 1187.8, | |
| "valid_targets_min": 262 | |
| }, | |
| { | |
| "epoch": 3.9869281045751634, | |
| "grad_norm": 1.047432790499157, | |
| "learning_rate": 1.8728561866637886e-05, | |
| "loss": 0.3039, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22139273583889008, | |
| "step": 610, | |
| "valid_targets_mean": 1173.8, | |
| "valid_targets_min": 241 | |
| }, | |
| { | |
| "epoch": 4.019607843137255, | |
| "grad_norm": 1.1536745154975416, | |
| "learning_rate": 1.840317555324764e-05, | |
| "loss": 0.2386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24080944061279297, | |
| "step": 615, | |
| "valid_targets_mean": 1125.4, | |
| "valid_targets_min": 262 | |
| }, | |
| { | |
| "epoch": 4.052287581699346, | |
| "grad_norm": 2.128292235679121, | |
| "learning_rate": 1.8078214088977817e-05, | |
| "loss": 0.2196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2866114377975464, | |
| "step": 620, | |
| "valid_targets_mean": 497.8, | |
| "valid_targets_min": 258 | |
| }, | |
| { | |
| "epoch": 4.084967320261438, | |
| "grad_norm": 1.0323930725322927, | |
| "learning_rate": 1.7753763932670257e-05, | |
| "loss": 0.2553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18582549691200256, | |
| "step": 625, | |
| "valid_targets_mean": 1817.0, | |
| "valid_targets_min": 269 | |
| }, | |
| { | |
| "epoch": 4.117647058823529, | |
| "grad_norm": 1.296142459740979, | |
| "learning_rate": 1.742991140712881e-05, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1894800066947937, | |
| "step": 630, | |
| "valid_targets_mean": 1049.4, | |
| "valid_targets_min": 272 | |
| }, | |
| { | |
| "epoch": 4.150326797385621, | |
| "grad_norm": 1.710406042987915, | |
| "learning_rate": 1.7106742676152454e-05, | |
| "loss": 0.2344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23869173228740692, | |
| "step": 635, | |
| "valid_targets_mean": 681.4, | |
| "valid_targets_min": 236 | |
| }, | |
| { | |
| "epoch": 4.183006535947713, | |
| "grad_norm": 1.4576865063285587, | |
| "learning_rate": 1.678434372161064e-05, | |
| "loss": 0.2277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18657907843589783, | |
| "step": 640, | |
| "valid_targets_mean": 978.1, | |
| "valid_targets_min": 219 | |
| }, | |
| { | |
| "epoch": 4.215686274509804, | |
| "grad_norm": 1.0562589689629556, | |
| "learning_rate": 1.646280032056704e-05, | |
| "loss": 0.2179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1481604427099228, | |
| "step": 645, | |
| "valid_targets_mean": 1139.1, | |
| "valid_targets_min": 235 | |
| }, | |
| { | |
| "epoch": 4.248366013071895, | |
| "grad_norm": 1.4916205632940045, | |
| "learning_rate": 1.6142198022457853e-05, | |
| "loss": 0.233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24131783843040466, | |
| "step": 650, | |
| "valid_targets_mean": 1026.0, | |
| "valid_targets_min": 308 | |
| }, | |
| { | |
| "epoch": 4.281045751633987, | |
| "grad_norm": 1.659388286048901, | |
| "learning_rate": 1.5822622126330597e-05, | |
| "loss": 0.2493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2790229022502899, | |
| "step": 655, | |
| "valid_targets_mean": 866.1, | |
| "valid_targets_min": 289 | |
| }, | |
| { | |
| "epoch": 4.313725490196078, | |
| "grad_norm": 1.73499163192446, | |
| "learning_rate": 1.550415765814955e-05, | |
| "loss": 0.2102, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19306321442127228, | |
| "step": 660, | |
| "valid_targets_mean": 854.9, | |
| "valid_targets_min": 258 | |
| }, | |
| { | |
| "epoch": 4.34640522875817, | |
| "grad_norm": 1.7154850302464963, | |
| "learning_rate": 1.5186889348173857e-05, | |
| "loss": 0.2354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25681236386299133, | |
| "step": 665, | |
| "valid_targets_mean": 689.3, | |
| "valid_targets_min": 217 | |
| }, | |
| { | |
| "epoch": 4.379084967320262, | |
| "grad_norm": 1.488660446451009, | |
| "learning_rate": 1.487090160841433e-05, | |
| "loss": 0.2582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2581811547279358, | |
| "step": 670, | |
| "valid_targets_mean": 959.4, | |
| "valid_targets_min": 278 | |
| }, | |
| { | |
| "epoch": 4.411764705882353, | |
| "grad_norm": 1.1881498761592073, | |
| "learning_rate": 1.4556278510174827e-05, | |
| "loss": 0.2262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19084610044956207, | |
| "step": 675, | |
| "valid_targets_mean": 1209.3, | |
| "valid_targets_min": 226 | |
| }, | |
| { | |
| "epoch": 4.444444444444445, | |
| "grad_norm": 1.2552326944423986, | |
| "learning_rate": 1.424310376168441e-05, | |
| "loss": 0.238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1857946366071701, | |
| "step": 680, | |
| "valid_targets_mean": 1078.2, | |
| "valid_targets_min": 248 | |
| }, | |
| { | |
| "epoch": 4.477124183006536, | |
| "grad_norm": 1.4969283216444473, | |
| "learning_rate": 1.3931460685826022e-05, | |
| "loss": 0.2424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22742964327335358, | |
| "step": 685, | |
| "valid_targets_mean": 1085.7, | |
| "valid_targets_min": 219 | |
| }, | |
| { | |
| "epoch": 4.509803921568627, | |
| "grad_norm": 1.154906103363336, | |
| "learning_rate": 1.3621432197967664e-05, | |
| "loss": 0.2228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1935591995716095, | |
| "step": 690, | |
| "valid_targets_mean": 1364.5, | |
| "valid_targets_min": 287 | |
| }, | |
| { | |
| "epoch": 4.542483660130719, | |
| "grad_norm": 1.81580756139067, | |
| "learning_rate": 1.3313100783902097e-05, | |
| "loss": 0.2396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2677111327648163, | |
| "step": 695, | |
| "valid_targets_mean": 630.1, | |
| "valid_targets_min": 243 | |
| }, | |
| { | |
| "epoch": 4.57516339869281, | |
| "grad_norm": 1.4394912824181447, | |
| "learning_rate": 1.3006548477900735e-05, | |
| "loss": 0.2442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2861871123313904, | |
| "step": 700, | |
| "valid_targets_mean": 1012.4, | |
| "valid_targets_min": 258 | |
| }, | |
| { | |
| "epoch": 4.607843137254902, | |
| "grad_norm": 1.7782439408173458, | |
| "learning_rate": 1.270185684088771e-05, | |
| "loss": 0.221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2639431953430176, | |
| "step": 705, | |
| "valid_targets_mean": 893.2, | |
| "valid_targets_min": 218 | |
| }, | |
| { | |
| "epoch": 4.640522875816993, | |
| "grad_norm": 1.3770119209689564, | |
| "learning_rate": 1.2399106938739903e-05, | |
| "loss": 0.2347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22172415256500244, | |
| "step": 710, | |
| "valid_targets_mean": 898.8, | |
| "valid_targets_min": 285 | |
| }, | |
| { | |
| "epoch": 4.673202614379085, | |
| "grad_norm": 1.5667989718671909, | |
| "learning_rate": 1.2098379320718633e-05, | |
| "loss": 0.2288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25639814138412476, | |
| "step": 715, | |
| "valid_targets_mean": 774.8, | |
| "valid_targets_min": 266 | |
| }, | |
| { | |
| "epoch": 4.705882352941177, | |
| "grad_norm": 1.6863304630924782, | |
| "learning_rate": 1.179975399803881e-05, | |
| "loss": 0.2324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2574901878833771, | |
| "step": 720, | |
| "valid_targets_mean": 951.9, | |
| "valid_targets_min": 235 | |
| }, | |
| { | |
| "epoch": 4.738562091503268, | |
| "grad_norm": 1.835300847793114, | |
| "learning_rate": 1.1503310422581286e-05, | |
| "loss": 0.2347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23203280568122864, | |
| "step": 725, | |
| "valid_targets_mean": 602.1, | |
| "valid_targets_min": 270 | |
| }, | |
| { | |
| "epoch": 4.771241830065359, | |
| "grad_norm": 1.4010835866515463, | |
| "learning_rate": 1.1209127465753978e-05, | |
| "loss": 0.2208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2546510100364685, | |
| "step": 730, | |
| "valid_targets_mean": 1022.6, | |
| "valid_targets_min": 225 | |
| }, | |
| { | |
| "epoch": 4.803921568627451, | |
| "grad_norm": 1.6323587959134847, | |
| "learning_rate": 1.0917283397507392e-05, | |
| "loss": 0.2089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2114773392677307, | |
| "step": 735, | |
| "valid_targets_mean": 891.1, | |
| "valid_targets_min": 246 | |
| }, | |
| { | |
| "epoch": 4.836601307189542, | |
| "grad_norm": 1.8162227837000073, | |
| "learning_rate": 1.0627855865510294e-05, | |
| "loss": 0.2332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.259385347366333, | |
| "step": 740, | |
| "valid_targets_mean": 751.1, | |
| "valid_targets_min": 295 | |
| }, | |
| { | |
| "epoch": 4.8692810457516345, | |
| "grad_norm": 1.5458544536658743, | |
| "learning_rate": 1.034092187449082e-05, | |
| "loss": 0.2475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1714608073234558, | |
| "step": 745, | |
| "valid_targets_mean": 771.1, | |
| "valid_targets_min": 246 | |
| }, | |
| { | |
| "epoch": 4.901960784313726, | |
| "grad_norm": 1.3785541410906907, | |
| "learning_rate": 1.0056557765748684e-05, | |
| "loss": 0.2137, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24545931816101074, | |
| "step": 750, | |
| "valid_targets_mean": 995.0, | |
| "valid_targets_min": 267 | |
| }, | |
| { | |
| "epoch": 4.934640522875817, | |
| "grad_norm": 1.3403750984818765, | |
| "learning_rate": 9.774839196843953e-06, | |
| "loss": 0.2251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25746211409568787, | |
| "step": 755, | |
| "valid_targets_mean": 1251.8, | |
| "valid_targets_min": 222 | |
| }, | |
| { | |
| "epoch": 4.967320261437909, | |
| "grad_norm": 1.619409098874215, | |
| "learning_rate": 9.49584112146765e-06, | |
| "loss": 0.2221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2491072118282318, | |
| "step": 760, | |
| "valid_targets_mean": 717.7, | |
| "valid_targets_min": 219 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.532384895615285, | |
| "learning_rate": 9.21963776949969e-06, | |
| "loss": 0.2197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2333248406648636, | |
| "step": 765, | |
| "valid_targets_mean": 766.5, | |
| "valid_targets_min": 240 | |
| }, | |
| { | |
| "epoch": 5.032679738562091, | |
| "grad_norm": 1.7276785744874288, | |
| "learning_rate": 8.946302627259363e-06, | |
| "loss": 0.1998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20002366602420807, | |
| "step": 770, | |
| "valid_targets_mean": 606.7, | |
| "valid_targets_min": 255 | |
| }, | |
| { | |
| "epoch": 5.065359477124183, | |
| "grad_norm": 1.3996086672565842, | |
| "learning_rate": 8.67590841795366e-06, | |
| "loss": 0.1893, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16175302863121033, | |
| "step": 775, | |
| "valid_targets_mean": 969.9, | |
| "valid_targets_min": 257 | |
| }, | |
| { | |
| "epoch": 5.098039215686274, | |
| "grad_norm": 1.3153977909254402, | |
| "learning_rate": 8.408527082328605e-06, | |
| "loss": 0.1854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1600106954574585, | |
| "step": 780, | |
| "valid_targets_mean": 1062.1, | |
| "valid_targets_min": 257 | |
| }, | |
| { | |
| "epoch": 5.130718954248366, | |
| "grad_norm": 1.5111274969532478, | |
| "learning_rate": 8.144229759528835e-06, | |
| "loss": 0.1881, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15232618153095245, | |
| "step": 785, | |
| "valid_targets_mean": 868.0, | |
| "valid_targets_min": 243 | |
| }, | |
| { | |
| "epoch": 5.163398692810458, | |
| "grad_norm": 1.30810433786, | |
| "learning_rate": 7.883086768170369e-06, | |
| "loss": 0.1847, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15596047043800354, | |
| "step": 790, | |
| "valid_targets_mean": 1220.3, | |
| "valid_targets_min": 245 | |
| }, | |
| { | |
| "epoch": 5.196078431372549, | |
| "grad_norm": 1.2979766762023117, | |
| "learning_rate": 7.625167587631732e-06, | |
| "loss": 0.2001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1786264181137085, | |
| "step": 795, | |
| "valid_targets_mean": 1454.9, | |
| "valid_targets_min": 266 | |
| }, | |
| { | |
| "epoch": 5.228758169934641, | |
| "grad_norm": 1.5405276569855957, | |
| "learning_rate": 7.370540839568372e-06, | |
| "loss": 0.1901, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15867263078689575, | |
| "step": 800, | |
| "valid_targets_mean": 666.9, | |
| "valid_targets_min": 240 | |
| }, | |
| { | |
| "epoch": 5.261437908496732, | |
| "grad_norm": 1.5240591186359513, | |
| "learning_rate": 7.119274269655265e-06, | |
| "loss": 0.2073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2132365107536316, | |
| "step": 805, | |
| "valid_targets_mean": 1250.9, | |
| "valid_targets_min": 264 | |
| }, | |
| { | |
| "epoch": 5.294117647058823, | |
| "grad_norm": 1.703110559867695, | |
| "learning_rate": 6.87143472956256e-06, | |
| "loss": 0.1913, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1889723688364029, | |
| "step": 810, | |
| "valid_targets_mean": 872.8, | |
| "valid_targets_min": 253 | |
| }, | |
| { | |
| "epoch": 5.326797385620915, | |
| "grad_norm": 1.3548680024706885, | |
| "learning_rate": 6.627088159169146e-06, | |
| "loss": 0.1651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16059917211532593, | |
| "step": 815, | |
| "valid_targets_mean": 1151.3, | |
| "valid_targets_min": 272 | |
| }, | |
| { | |
| "epoch": 5.359477124183006, | |
| "grad_norm": 1.766434503841294, | |
| "learning_rate": 6.3862995690187505e-06, | |
| "loss": 0.1777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1978355199098587, | |
| "step": 820, | |
| "valid_targets_mean": 688.6, | |
| "valid_targets_min": 242 | |
| }, | |
| { | |
| "epoch": 5.392156862745098, | |
| "grad_norm": 1.681475454731881, | |
| "learning_rate": 6.1491330230232944e-06, | |
| "loss": 0.2005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21753373742103577, | |
| "step": 825, | |
| "valid_targets_mean": 723.4, | |
| "valid_targets_min": 230 | |
| }, | |
| { | |
| "epoch": 5.42483660130719, | |
| "grad_norm": 1.436963945472817, | |
| "learning_rate": 5.915651621418172e-06, | |
| "loss": 0.1839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16063036024570465, | |
| "step": 830, | |
| "valid_targets_mean": 1000.5, | |
| "valid_targets_min": 281 | |
| }, | |
| { | |
| "epoch": 5.457516339869281, | |
| "grad_norm": 1.6529262568452745, | |
| "learning_rate": 5.6859174839738576e-06, | |
| "loss": 0.2127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22638444602489471, | |
| "step": 835, | |
| "valid_targets_mean": 919.0, | |
| "valid_targets_min": 226 | |
| }, | |
| { | |
| "epoch": 5.490196078431373, | |
| "grad_norm": 1.3361133443873117, | |
| "learning_rate": 5.459991733468375e-06, | |
| "loss": 0.1693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1512422263622284, | |
| "step": 840, | |
| "valid_targets_mean": 1104.3, | |
| "valid_targets_min": 289 | |
| }, | |
| { | |
| "epoch": 5.522875816993464, | |
| "grad_norm": 1.4473821491885344, | |
| "learning_rate": 5.237934479425091e-06, | |
| "loss": 0.2142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1956217885017395, | |
| "step": 845, | |
| "valid_targets_mean": 1222.6, | |
| "valid_targets_min": 270 | |
| }, | |
| { | |
| "epoch": 5.555555555555555, | |
| "grad_norm": 2.161746051226456, | |
| "learning_rate": 5.019804802120027e-06, | |
| "loss": 0.2004, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2561233341693878, | |
| "step": 850, | |
| "valid_targets_mean": 559.7, | |
| "valid_targets_min": 217 | |
| }, | |
| { | |
| "epoch": 5.588235294117647, | |
| "grad_norm": 1.7693395053973981, | |
| "learning_rate": 4.805660736863023e-06, | |
| "loss": 0.1831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19513806700706482, | |
| "step": 855, | |
| "valid_targets_mean": 617.1, | |
| "valid_targets_min": 287 | |
| }, | |
| { | |
| "epoch": 5.620915032679738, | |
| "grad_norm": 1.3552287003331067, | |
| "learning_rate": 4.595559258556963e-06, | |
| "loss": 0.188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1493796408176422, | |
| "step": 860, | |
| "valid_targets_mean": 1039.1, | |
| "valid_targets_min": 222 | |
| }, | |
| { | |
| "epoch": 5.65359477124183, | |
| "grad_norm": 1.3819829355501174, | |
| "learning_rate": 4.389556266539081e-06, | |
| "loss": 0.2014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17469432950019836, | |
| "step": 865, | |
| "valid_targets_mean": 1213.0, | |
| "valid_targets_min": 290 | |
| }, | |
| { | |
| "epoch": 5.686274509803922, | |
| "grad_norm": 1.1282318414623302, | |
| "learning_rate": 4.187706569708472e-06, | |
| "loss": 0.2089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17088523507118225, | |
| "step": 870, | |
| "valid_targets_mean": 1821.5, | |
| "valid_targets_min": 255 | |
| }, | |
| { | |
| "epoch": 5.718954248366013, | |
| "grad_norm": 1.5706108442305917, | |
| "learning_rate": 3.990063871943681e-06, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20732146501541138, | |
| "step": 875, | |
| "valid_targets_mean": 747.2, | |
| "valid_targets_min": 257 | |
| }, | |
| { | |
| "epoch": 5.751633986928105, | |
| "grad_norm": 1.2522615185034307, | |
| "learning_rate": 3.796680757814344e-06, | |
| "loss": 0.2047, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16442234814167023, | |
| "step": 880, | |
| "valid_targets_mean": 1394.4, | |
| "valid_targets_min": 257 | |
| }, | |
| { | |
| "epoch": 5.784313725490196, | |
| "grad_norm": 1.5816802112242634, | |
| "learning_rate": 3.6076086785905708e-06, | |
| "loss": 0.1978, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20470181107521057, | |
| "step": 885, | |
| "valid_targets_mean": 890.7, | |
| "valid_targets_min": 274 | |
| }, | |
| { | |
| "epoch": 5.816993464052287, | |
| "grad_norm": 1.7816275222819353, | |
| "learning_rate": 3.4228979385539153e-06, | |
| "loss": 0.1838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2052854597568512, | |
| "step": 890, | |
| "valid_targets_mean": 843.9, | |
| "valid_targets_min": 274 | |
| }, | |
| { | |
| "epoch": 5.849673202614379, | |
| "grad_norm": 2.7820140943526086, | |
| "learning_rate": 3.242597681613471e-06, | |
| "loss": 0.2068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3125446140766144, | |
| "step": 895, | |
| "valid_targets_mean": 460.5, | |
| "valid_targets_min": 261 | |
| }, | |
| { | |
| "epoch": 5.882352941176471, | |
| "grad_norm": 1.089471821874377, | |
| "learning_rate": 3.0667558782306782e-06, | |
| "loss": 0.1695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12326781451702118, | |
| "step": 900, | |
| "valid_targets_mean": 1121.2, | |
| "valid_targets_min": 219 | |
| }, | |
| { | |
| "epoch": 5.915032679738562, | |
| "grad_norm": 1.7665649158055792, | |
| "learning_rate": 2.895419312656409e-06, | |
| "loss": 0.1708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19156697392463684, | |
| "step": 905, | |
| "valid_targets_mean": 633.0, | |
| "valid_targets_min": 257 | |
| }, | |
| { | |
| "epoch": 5.947712418300654, | |
| "grad_norm": 1.6688900557662307, | |
| "learning_rate": 2.7286335704835788e-06, | |
| "loss": 0.1935, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18692585825920105, | |
| "step": 910, | |
| "valid_targets_mean": 1061.8, | |
| "valid_targets_min": 243 | |
| }, | |
| { | |
| "epoch": 5.980392156862745, | |
| "grad_norm": 1.7410431833200393, | |
| "learning_rate": 2.566443026518692e-06, | |
| "loss": 0.2118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2652069926261902, | |
| "step": 915, | |
| "valid_targets_mean": 920.4, | |
| "valid_targets_min": 247 | |
| }, | |
| { | |
| "epoch": 6.0130718954248366, | |
| "grad_norm": 1.5421028631124705, | |
| "learning_rate": 2.4088908329755678e-06, | |
| "loss": 0.187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17312213778495789, | |
| "step": 920, | |
| "valid_targets_mean": 767.6, | |
| "valid_targets_min": 259 | |
| }, | |
| { | |
| "epoch": 6.045751633986928, | |
| "grad_norm": 1.5991633631226976, | |
| "learning_rate": 2.256018907994284e-06, | |
| "loss": 0.139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17524030804634094, | |
| "step": 925, | |
| "valid_targets_mean": 662.6, | |
| "valid_targets_min": 245 | |
| }, | |
| { | |
| "epoch": 6.078431372549019, | |
| "grad_norm": 1.3250031504324633, | |
| "learning_rate": 2.107867924488509e-06, | |
| "loss": 0.1548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12431657314300537, | |
| "step": 930, | |
| "valid_targets_mean": 958.3, | |
| "valid_targets_min": 218 | |
| }, | |
| { | |
| "epoch": 6.111111111111111, | |
| "grad_norm": 1.2101774128898037, | |
| "learning_rate": 1.9644772993241166e-06, | |
| "loss": 0.1588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1561109721660614, | |
| "step": 935, | |
| "valid_targets_mean": 1136.2, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 6.143790849673203, | |
| "grad_norm": 1.925356995762324, | |
| "learning_rate": 1.8258851828319678e-06, | |
| "loss": 0.1741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17998185753822327, | |
| "step": 940, | |
| "valid_targets_mean": 535.2, | |
| "valid_targets_min": 258 | |
| }, | |
| { | |
| "epoch": 6.176470588235294, | |
| "grad_norm": 1.737151480718245, | |
| "learning_rate": 1.692128448657695e-06, | |
| "loss": 0.1674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18432410061359406, | |
| "step": 945, | |
| "valid_targets_mean": 649.9, | |
| "valid_targets_min": 235 | |
| }, | |
| { | |
| "epoch": 6.209150326797386, | |
| "grad_norm": 1.4475105159337607, | |
| "learning_rate": 1.5632426839511494e-06, | |
| "loss": 0.1791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1958422064781189, | |
| "step": 950, | |
| "valid_targets_mean": 1076.3, | |
| "valid_targets_min": 284 | |
| }, | |
| { | |
| "epoch": 6.241830065359477, | |
| "grad_norm": 1.846609568443287, | |
| "learning_rate": 1.4392621798981154e-06, | |
| "loss": 0.1831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20268788933753967, | |
| "step": 955, | |
| "valid_targets_mean": 756.5, | |
| "valid_targets_min": 248 | |
| }, | |
| { | |
| "epoch": 6.2745098039215685, | |
| "grad_norm": 2.1487512775179427, | |
| "learning_rate": 1.3202199225968481e-06, | |
| "loss": 0.1799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21326568722724915, | |
| "step": 960, | |
| "valid_targets_mean": 772.2, | |
| "valid_targets_min": 277 | |
| }, | |
| { | |
| "epoch": 6.30718954248366, | |
| "grad_norm": 1.9580761806861307, | |
| "learning_rate": 1.2061475842818337e-06, | |
| "loss": 0.178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19729763269424438, | |
| "step": 965, | |
| "valid_targets_mean": 605.1, | |
| "valid_targets_min": 246 | |
| }, | |
| { | |
| "epoch": 6.339869281045751, | |
| "grad_norm": 2.549605085746636, | |
| "learning_rate": 1.0970755148971057e-06, | |
| "loss": 0.1761, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27497926354408264, | |
| "step": 970, | |
| "valid_targets_mean": 525.9, | |
| "valid_targets_min": 226 | |
| }, | |
| { | |
| "epoch": 6.372549019607844, | |
| "grad_norm": 1.9954702320448223, | |
| "learning_rate": 9.930327340213908e-07, | |
| "loss": 0.1725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24627891182899475, | |
| "step": 975, | |
| "valid_targets_mean": 668.2, | |
| "valid_targets_min": 219 | |
| }, | |
| { | |
| "epoch": 6.405228758169935, | |
| "grad_norm": 1.4583942689664242, | |
| "learning_rate": 8.940469231471893e-07, | |
| "loss": 0.163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15345948934555054, | |
| "step": 980, | |
| "valid_targets_mean": 996.8, | |
| "valid_targets_min": 231 | |
| }, | |
| { | |
| "epoch": 6.437908496732026, | |
| "grad_norm": 1.2627907187503407, | |
| "learning_rate": 8.001444183158602e-07, | |
| "loss": 0.1796, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19024597108364105, | |
| "step": 985, | |
| "valid_targets_mean": 1571.8, | |
| "valid_targets_min": 258 | |
| }, | |
| { | |
| "epoch": 6.470588235294118, | |
| "grad_norm": 1.5109741084844859, | |
| "learning_rate": 7.1135020311071e-07, | |
| "loss": 0.1758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15086877346038818, | |
| "step": 990, | |
| "valid_targets_mean": 737.7, | |
| "valid_targets_min": 226 | |
| }, | |
| { | |
| "epoch": 6.503267973856209, | |
| "grad_norm": 1.6130378471299882, | |
| "learning_rate": 6.276879020098769e-07, | |
| "loss": 0.1753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19527272880077362, | |
| "step": 995, | |
| "valid_targets_mean": 776.8, | |
| "valid_targets_min": 276 | |
| }, | |
| { | |
| "epoch": 6.5359477124183005, | |
| "grad_norm": 1.0872457293239366, | |
| "learning_rate": 5.491797741008232e-07, | |
| "loss": 0.1872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14185872673988342, | |
| "step": 1000, | |
| "valid_targets_mean": 1552.2, | |
| "valid_targets_min": 308 | |
| }, | |
| { | |
| "epoch": 6.568627450980392, | |
| "grad_norm": 1.5205248767147233, | |
| "learning_rate": 4.758467071581363e-07, | |
| "loss": 0.1598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15201915800571442, | |
| "step": 1005, | |
| "valid_targets_mean": 837.2, | |
| "valid_targets_min": 262 | |
| }, | |
| { | |
| "epoch": 6.601307189542483, | |
| "grad_norm": 1.5480417864738425, | |
| "learning_rate": 4.077082120861309e-07, | |
| "loss": 0.1659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18016156554222107, | |
| "step": 1010, | |
| "valid_targets_mean": 948.2, | |
| "valid_targets_min": 259 | |
| }, | |
| { | |
| "epoch": 6.633986928104575, | |
| "grad_norm": 1.4118892710756072, | |
| "learning_rate": 3.4478241772780695e-07, | |
| "loss": 0.1596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15900777280330658, | |
| "step": 1015, | |
| "valid_targets_mean": 1190.9, | |
| "valid_targets_min": 283 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 1.7202119833416936, | |
| "learning_rate": 2.8708606604151757e-07, | |
| "loss": 0.1885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19836293160915375, | |
| "step": 1020, | |
| "valid_targets_mean": 671.6, | |
| "valid_targets_min": 242 | |
| }, | |
| { | |
| "epoch": 6.699346405228758, | |
| "grad_norm": 1.6749245084847142, | |
| "learning_rate": 2.346345076466272e-07, | |
| "loss": 0.1514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17178763449192047, | |
| "step": 1025, | |
| "valid_targets_mean": 923.9, | |
| "valid_targets_min": 196 | |
| }, | |
| { | |
| "epoch": 6.73202614379085, | |
| "grad_norm": 1.3050532980878529, | |
| "learning_rate": 1.8744169773932784e-07, | |
| "loss": 0.1579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1522129327058792, | |
| "step": 1030, | |
| "valid_targets_mean": 1047.8, | |
| "valid_targets_min": 273 | |
| }, | |
| { | |
| "epoch": 6.764705882352941, | |
| "grad_norm": 1.7719572564362351, | |
| "learning_rate": 1.4552019237976e-07, | |
| "loss": 0.1979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22099488973617554, | |
| "step": 1035, | |
| "valid_targets_mean": 829.3, | |
| "valid_targets_min": 288 | |
| }, | |
| { | |
| "epoch": 6.7973856209150325, | |
| "grad_norm": 1.4013677157515687, | |
| "learning_rate": 1.0888114515134274e-07, | |
| "loss": 0.1934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17296719551086426, | |
| "step": 1040, | |
| "valid_targets_mean": 1032.6, | |
| "valid_targets_min": 255 | |
| }, | |
| { | |
| "epoch": 6.830065359477124, | |
| "grad_norm": 1.4687939649728274, | |
| "learning_rate": 7.753430419328301e-08, | |
| "loss": 0.1638, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16742470860481262, | |
| "step": 1045, | |
| "valid_targets_mean": 1204.0, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 6.862745098039216, | |
| "grad_norm": 1.3669411898643498, | |
| "learning_rate": 5.1488009606979195e-08, | |
| "loss": 0.1568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13698092103004456, | |
| "step": 1050, | |
| "valid_targets_mean": 980.1, | |
| "valid_targets_min": 276 | |
| }, | |
| { | |
| "epoch": 6.895424836601308, | |
| "grad_norm": 1.119666687197484, | |
| "learning_rate": 3.074919123708275e-08, | |
| "loss": 0.191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13298380374908447, | |
| "step": 1055, | |
| "valid_targets_mean": 1280.6, | |
| "valid_targets_min": 238 | |
| }, | |
| { | |
| "epoch": 6.928104575163399, | |
| "grad_norm": 1.5063802393305312, | |
| "learning_rate": 1.5323366827737496e-08, | |
| "loss": 0.193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17369940876960754, | |
| "step": 1060, | |
| "valid_targets_mean": 982.8, | |
| "valid_targets_min": 219 | |
| }, | |
| { | |
| "epoch": 6.96078431372549, | |
| "grad_norm": 1.614719027777907, | |
| "learning_rate": 5.2146405545427935e-09, | |
| "loss": 0.1605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1444833129644394, | |
| "step": 1065, | |
| "valid_targets_mean": 855.0, | |
| "valid_targets_min": 235 | |
| }, | |
| { | |
| "epoch": 6.993464052287582, | |
| "grad_norm": 1.435878529641309, | |
| "learning_rate": 4.2570193260482727e-10, | |
| "loss": 0.1657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.153692826628685, | |
| "step": 1070, | |
| "valid_targets_mean": 1271.9, | |
| "valid_targets_min": 230 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21700537204742432, | |
| "step": 1071, | |
| "total_flos": 87884495978496.0, | |
| "train_loss": 0.3235391679834696, | |
| "train_runtime": 3658.4254, | |
| "train_samples_per_second": 4.669, | |
| "train_steps_per_second": 0.293, | |
| "valid_targets_mean": 902.0, | |
| "valid_targets_min": 329 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1071, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 87884495978496.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |