penfever's picture
End of training
b91a79d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 1071,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.032679738562091505,
"grad_norm": 11.060789182905086,
"learning_rate": 1.4814814814814815e-06,
"loss": 0.8827,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.8205808401107788,
"step": 5,
"valid_targets_mean": 779.9,
"valid_targets_min": 250
},
{
"epoch": 0.06535947712418301,
"grad_norm": 6.973307415232975,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.8066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.8751524686813354,
"step": 10,
"valid_targets_mean": 1240.4,
"valid_targets_min": 217
},
{
"epoch": 0.09803921568627451,
"grad_norm": 4.5603144559339395,
"learning_rate": 5.185185185185185e-06,
"loss": 0.7469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.7398011684417725,
"step": 15,
"valid_targets_mean": 669.1,
"valid_targets_min": 285
},
{
"epoch": 0.13071895424836602,
"grad_norm": 3.5517010420743618,
"learning_rate": 7.0370370370370375e-06,
"loss": 0.6986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.8484277129173279,
"step": 20,
"valid_targets_mean": 701.3,
"valid_targets_min": 235
},
{
"epoch": 0.16339869281045752,
"grad_norm": 1.71312096923623,
"learning_rate": 8.888888888888888e-06,
"loss": 0.7292,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.7061011791229248,
"step": 25,
"valid_targets_mean": 1292.7,
"valid_targets_min": 226
},
{
"epoch": 0.19607843137254902,
"grad_norm": 1.1836260079640273,
"learning_rate": 1.0740740740740742e-05,
"loss": 0.6299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5523278713226318,
"step": 30,
"valid_targets_mean": 1475.2,
"valid_targets_min": 285
},
{
"epoch": 0.22875816993464052,
"grad_norm": 0.7908890063519861,
"learning_rate": 1.2592592592592593e-05,
"loss": 0.4857,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3864622116088867,
"step": 35,
"valid_targets_mean": 2134.2,
"valid_targets_min": 335
},
{
"epoch": 0.26143790849673204,
"grad_norm": 1.3350691541627218,
"learning_rate": 1.4444444444444446e-05,
"loss": 0.5807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4980165958404541,
"step": 40,
"valid_targets_mean": 929.5,
"valid_targets_min": 296
},
{
"epoch": 0.29411764705882354,
"grad_norm": 1.3513898058061415,
"learning_rate": 1.6296296296296297e-05,
"loss": 0.6403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6828616857528687,
"step": 45,
"valid_targets_mean": 1063.6,
"valid_targets_min": 278
},
{
"epoch": 0.32679738562091504,
"grad_norm": 1.0371862834617929,
"learning_rate": 1.814814814814815e-05,
"loss": 0.5481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4910392761230469,
"step": 50,
"valid_targets_mean": 1120.2,
"valid_targets_min": 268
},
{
"epoch": 0.35947712418300654,
"grad_norm": 1.280369114672039,
"learning_rate": 2e-05,
"loss": 0.475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5395255088806152,
"step": 55,
"valid_targets_mean": 998.9,
"valid_targets_min": 242
},
{
"epoch": 0.39215686274509803,
"grad_norm": 1.117311206993178,
"learning_rate": 2.1851851851851852e-05,
"loss": 0.5648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5647487640380859,
"step": 60,
"valid_targets_mean": 1208.2,
"valid_targets_min": 271
},
{
"epoch": 0.42483660130718953,
"grad_norm": 1.1653791876480772,
"learning_rate": 2.3703703703703703e-05,
"loss": 0.5476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5218506455421448,
"step": 65,
"valid_targets_mean": 1001.4,
"valid_targets_min": 280
},
{
"epoch": 0.45751633986928103,
"grad_norm": 1.174967214115349,
"learning_rate": 2.5555555555555554e-05,
"loss": 0.5201,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.47852823138237,
"step": 70,
"valid_targets_mean": 1043.4,
"valid_targets_min": 243
},
{
"epoch": 0.49019607843137253,
"grad_norm": 1.6604928495972278,
"learning_rate": 2.740740740740741e-05,
"loss": 0.5551,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5851165056228638,
"step": 75,
"valid_targets_mean": 576.4,
"valid_targets_min": 287
},
{
"epoch": 0.5228758169934641,
"grad_norm": 1.0202504936662373,
"learning_rate": 2.9259259259259262e-05,
"loss": 0.5125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3803825378417969,
"step": 80,
"valid_targets_mean": 1004.8,
"valid_targets_min": 274
},
{
"epoch": 0.5555555555555556,
"grad_norm": 1.1312120882470664,
"learning_rate": 3.111111111111112e-05,
"loss": 0.5344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5755863189697266,
"step": 85,
"valid_targets_mean": 1221.0,
"valid_targets_min": 243
},
{
"epoch": 0.5882352941176471,
"grad_norm": 1.4325847007248265,
"learning_rate": 3.2962962962962964e-05,
"loss": 0.5168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6161978244781494,
"step": 90,
"valid_targets_mean": 746.8,
"valid_targets_min": 322
},
{
"epoch": 0.6209150326797386,
"grad_norm": 0.9183077187637829,
"learning_rate": 3.481481481481482e-05,
"loss": 0.5328,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.45081230998039246,
"step": 95,
"valid_targets_mean": 1345.3,
"valid_targets_min": 278
},
{
"epoch": 0.6535947712418301,
"grad_norm": 1.6429963233048106,
"learning_rate": 3.6666666666666666e-05,
"loss": 0.582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6670611500740051,
"step": 100,
"valid_targets_mean": 619.7,
"valid_targets_min": 270
},
{
"epoch": 0.6862745098039216,
"grad_norm": 1.1663889041506792,
"learning_rate": 3.851851851851852e-05,
"loss": 0.4839,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4529034495353699,
"step": 105,
"valid_targets_mean": 863.6,
"valid_targets_min": 236
},
{
"epoch": 0.7189542483660131,
"grad_norm": 1.2334368369716797,
"learning_rate": 3.9999893574233685e-05,
"loss": 0.5261,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4745844304561615,
"step": 110,
"valid_targets_mean": 830.2,
"valid_targets_min": 258
},
{
"epoch": 0.7516339869281046,
"grad_norm": 1.249990193797269,
"learning_rate": 3.9996168791339075e-05,
"loss": 0.5166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4642437696456909,
"step": 115,
"valid_targets_mean": 989.8,
"valid_targets_min": 260
},
{
"epoch": 0.7843137254901961,
"grad_norm": 1.2281972228406923,
"learning_rate": 3.998712385271904e-05,
"loss": 0.5019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.48900026082992554,
"step": 120,
"valid_targets_mean": 832.8,
"valid_targets_min": 255
},
{
"epoch": 0.8169934640522876,
"grad_norm": 1.3239536346835794,
"learning_rate": 3.997276116485867e-05,
"loss": 0.5344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6641391515731812,
"step": 125,
"valid_targets_mean": 930.1,
"valid_targets_min": 308
},
{
"epoch": 0.8496732026143791,
"grad_norm": 1.1579149024229984,
"learning_rate": 3.995308454907679e-05,
"loss": 0.542,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5919806957244873,
"step": 130,
"valid_targets_mean": 1064.2,
"valid_targets_min": 274
},
{
"epoch": 0.8823529411764706,
"grad_norm": 1.0457817537071603,
"learning_rate": 3.992809924050924e-05,
"loss": 0.4896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5484431385993958,
"step": 135,
"valid_targets_mean": 1360.4,
"valid_targets_min": 228
},
{
"epoch": 0.9150326797385621,
"grad_norm": 1.2511239930549563,
"learning_rate": 3.9897811886716054e-05,
"loss": 0.5363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.610672652721405,
"step": 140,
"valid_targets_mean": 1068.2,
"valid_targets_min": 252
},
{
"epoch": 0.9477124183006536,
"grad_norm": 1.2337054029635952,
"learning_rate": 3.986223054591281e-05,
"loss": 0.5173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4866800606250763,
"step": 145,
"valid_targets_mean": 781.0,
"valid_targets_min": 246
},
{
"epoch": 0.9803921568627451,
"grad_norm": 0.8076936922856879,
"learning_rate": 3.982136468482665e-05,
"loss": 0.4419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35834211111068726,
"step": 150,
"valid_targets_mean": 1508.8,
"valid_targets_min": 264
},
{
"epoch": 1.0130718954248366,
"grad_norm": 0.7776333910648919,
"learning_rate": 3.9775225176177595e-05,
"loss": 0.3684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27405640482902527,
"step": 155,
"valid_targets_mean": 1327.5,
"valid_targets_min": 235
},
{
"epoch": 1.0457516339869282,
"grad_norm": 1.1734500557798586,
"learning_rate": 3.972382429578577e-05,
"loss": 0.442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37759023904800415,
"step": 160,
"valid_targets_mean": 763.9,
"valid_targets_min": 244
},
{
"epoch": 1.0784313725490196,
"grad_norm": 0.9239963152479674,
"learning_rate": 3.966717571930529e-05,
"loss": 0.4546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3895590901374817,
"step": 165,
"valid_targets_mean": 1424.2,
"valid_targets_min": 258
},
{
"epoch": 1.1111111111111112,
"grad_norm": 0.7807825561166607,
"learning_rate": 3.960529451858575e-05,
"loss": 0.4344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3694984018802643,
"step": 170,
"valid_targets_mean": 1572.8,
"valid_targets_min": 327
},
{
"epoch": 1.1437908496732025,
"grad_norm": 0.7003455825119332,
"learning_rate": 3.9538197157662226e-05,
"loss": 0.4177,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2527535557746887,
"step": 175,
"valid_targets_mean": 1521.2,
"valid_targets_min": 217
},
{
"epoch": 1.1764705882352942,
"grad_norm": 1.7156135500256005,
"learning_rate": 3.946590148837487e-05,
"loss": 0.4335,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.47262683510780334,
"step": 180,
"valid_targets_mean": 566.9,
"valid_targets_min": 271
},
{
"epoch": 1.2091503267973855,
"grad_norm": 1.008970459747819,
"learning_rate": 3.9388426745619266e-05,
"loss": 0.4002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3759155869483948,
"step": 185,
"valid_targets_mean": 1439.6,
"valid_targets_min": 273
},
{
"epoch": 1.2418300653594772,
"grad_norm": 1.327191452209238,
"learning_rate": 3.930579354222883e-05,
"loss": 0.4606,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.463574081659317,
"step": 190,
"valid_targets_mean": 751.6,
"valid_targets_min": 248
},
{
"epoch": 1.2745098039215685,
"grad_norm": 1.1345668523426535,
"learning_rate": 3.921802386349057e-05,
"loss": 0.4677,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.43786492943763733,
"step": 195,
"valid_targets_mean": 980.7,
"valid_targets_min": 284
},
{
"epoch": 1.3071895424836601,
"grad_norm": 1.377027240152856,
"learning_rate": 3.912514106129576e-05,
"loss": 0.458,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5057384967803955,
"step": 200,
"valid_targets_mean": 731.2,
"valid_targets_min": 269
},
{
"epoch": 1.3398692810457518,
"grad_norm": 0.8297808392093082,
"learning_rate": 3.902716984792685e-05,
"loss": 0.4572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32464444637298584,
"step": 205,
"valid_targets_mean": 1297.6,
"valid_targets_min": 245
},
{
"epoch": 1.3725490196078431,
"grad_norm": 0.982008610679383,
"learning_rate": 3.8924136289482686e-05,
"loss": 0.4438,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.49354782700538635,
"step": 210,
"valid_targets_mean": 1354.9,
"valid_targets_min": 240
},
{
"epoch": 1.4052287581699345,
"grad_norm": 0.9583843270801939,
"learning_rate": 3.881606779894329e-05,
"loss": 0.476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4370919466018677,
"step": 215,
"valid_targets_mean": 1446.8,
"valid_targets_min": 271
},
{
"epoch": 1.4379084967320261,
"grad_norm": 0.981958627933756,
"learning_rate": 3.8702993128876455e-05,
"loss": 0.4424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4009607434272766,
"step": 220,
"valid_targets_mean": 1175.4,
"valid_targets_min": 244
},
{
"epoch": 1.4705882352941178,
"grad_norm": 1.0318349122572381,
"learning_rate": 3.858494236378785e-05,
"loss": 0.4517,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34431201219558716,
"step": 225,
"valid_targets_mean": 848.2,
"valid_targets_min": 266
},
{
"epoch": 1.5032679738562091,
"grad_norm": 1.1926829814685804,
"learning_rate": 3.846194691211678e-05,
"loss": 0.4507,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5188771486282349,
"step": 230,
"valid_targets_mean": 975.6,
"valid_targets_min": 265
},
{
"epoch": 1.5359477124183005,
"grad_norm": 1.3287118805218745,
"learning_rate": 3.8334039497879694e-05,
"loss": 0.4525,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.43521052598953247,
"step": 235,
"valid_targets_mean": 755.8,
"valid_targets_min": 248
},
{
"epoch": 1.5686274509803921,
"grad_norm": 1.0912341313805085,
"learning_rate": 3.8201254151963664e-05,
"loss": 0.4507,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4125358462333679,
"step": 240,
"valid_targets_mean": 935.4,
"valid_targets_min": 269
},
{
"epoch": 1.6013071895424837,
"grad_norm": 1.337631920998683,
"learning_rate": 3.8063626203072196e-05,
"loss": 0.4416,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4946654736995697,
"step": 245,
"valid_targets_mean": 760.1,
"valid_targets_min": 229
},
{
"epoch": 1.6339869281045751,
"grad_norm": 0.7868890760626824,
"learning_rate": 3.792119226832569e-05,
"loss": 0.4301,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3513905704021454,
"step": 250,
"valid_targets_mean": 1832.1,
"valid_targets_min": 302
},
{
"epoch": 1.6666666666666665,
"grad_norm": 1.1418891712589625,
"learning_rate": 3.7773990243519154e-05,
"loss": 0.4653,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.43997740745544434,
"step": 255,
"valid_targets_mean": 1025.1,
"valid_targets_min": 259
},
{
"epoch": 1.6993464052287581,
"grad_norm": 1.1982818361901046,
"learning_rate": 3.762205929303969e-05,
"loss": 0.443,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.523041844367981,
"step": 260,
"valid_targets_mean": 893.4,
"valid_targets_min": 243
},
{
"epoch": 1.7320261437908497,
"grad_norm": 1.4756814409685264,
"learning_rate": 3.746543983944646e-05,
"loss": 0.4581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4994322657585144,
"step": 265,
"valid_targets_mean": 681.1,
"valid_targets_min": 268
},
{
"epoch": 1.7647058823529411,
"grad_norm": 1.2251966207419902,
"learning_rate": 3.730417355271593e-05,
"loss": 0.4599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4771292209625244,
"step": 270,
"valid_targets_mean": 823.2,
"valid_targets_min": 262
},
{
"epoch": 1.7973856209150327,
"grad_norm": 0.8560952770470027,
"learning_rate": 3.713830333915517e-05,
"loss": 0.4125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3659961223602295,
"step": 275,
"valid_targets_mean": 1743.6,
"valid_targets_min": 277
},
{
"epoch": 1.8300653594771243,
"grad_norm": 1.2640409291326877,
"learning_rate": 3.6967873329986305e-05,
"loss": 0.3786,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.40237635374069214,
"step": 280,
"valid_targets_mean": 707.1,
"valid_targets_min": 235
},
{
"epoch": 1.8627450980392157,
"grad_norm": 1.6042954967809109,
"learning_rate": 3.679292886960497e-05,
"loss": 0.436,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.47581565380096436,
"step": 285,
"valid_targets_mean": 964.8,
"valid_targets_min": 248
},
{
"epoch": 1.8954248366013071,
"grad_norm": 1.5217135823702368,
"learning_rate": 3.661351650351608e-05,
"loss": 0.4431,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4720376431941986,
"step": 290,
"valid_targets_mean": 604.7,
"valid_targets_min": 236
},
{
"epoch": 1.9281045751633987,
"grad_norm": 1.4782923070866694,
"learning_rate": 3.642968396594995e-05,
"loss": 0.4515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5038082003593445,
"step": 295,
"valid_targets_mean": 618.2,
"valid_targets_min": 258
},
{
"epoch": 1.9607843137254903,
"grad_norm": 1.1511433477713358,
"learning_rate": 3.624148016716222e-05,
"loss": 0.4676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5166301727294922,
"step": 300,
"valid_targets_mean": 1017.7,
"valid_targets_min": 263
},
{
"epoch": 1.9934640522875817,
"grad_norm": 0.7376916907598469,
"learning_rate": 3.604895518042081e-05,
"loss": 0.4447,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2603156566619873,
"step": 305,
"valid_targets_mean": 1753.6,
"valid_targets_min": 253
},
{
"epoch": 2.026143790849673,
"grad_norm": 1.1136650197465474,
"learning_rate": 3.585216022868356e-05,
"loss": 0.3317,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3538415729999542,
"step": 310,
"valid_targets_mean": 857.9,
"valid_targets_min": 267
},
{
"epoch": 2.0588235294117645,
"grad_norm": 1.1254159871502543,
"learning_rate": 3.565114767096984e-05,
"loss": 0.3789,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3277912735939026,
"step": 315,
"valid_targets_mean": 981.1,
"valid_targets_min": 253
},
{
"epoch": 2.0915032679738563,
"grad_norm": 1.3128993623554719,
"learning_rate": 3.544597098843001e-05,
"loss": 0.3653,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37582293152809143,
"step": 320,
"valid_targets_mean": 853.1,
"valid_targets_min": 231
},
{
"epoch": 2.1241830065359477,
"grad_norm": 1.192620898541865,
"learning_rate": 3.5236684770116295e-05,
"loss": 0.3748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4059767723083496,
"step": 325,
"valid_targets_mean": 1326.2,
"valid_targets_min": 219
},
{
"epoch": 2.156862745098039,
"grad_norm": 1.0971126049747322,
"learning_rate": 3.502334469845886e-05,
"loss": 0.3466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33508726954460144,
"step": 330,
"valid_targets_mean": 1035.2,
"valid_targets_min": 269
},
{
"epoch": 2.189542483660131,
"grad_norm": 1.24119071299791,
"learning_rate": 3.4806007534451075e-05,
"loss": 0.3717,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4131796360015869,
"step": 335,
"valid_targets_mean": 1173.8,
"valid_targets_min": 235
},
{
"epoch": 2.2222222222222223,
"grad_norm": 0.9019703404636363,
"learning_rate": 3.458473110254767e-05,
"loss": 0.3992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3299303650856018,
"step": 340,
"valid_targets_mean": 1531.8,
"valid_targets_min": 299
},
{
"epoch": 2.2549019607843137,
"grad_norm": 1.3137476700390396,
"learning_rate": 3.43595742752801e-05,
"loss": 0.3575,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33964088559150696,
"step": 345,
"valid_targets_mean": 799.1,
"valid_targets_min": 285
},
{
"epoch": 2.287581699346405,
"grad_norm": 1.3766367222049374,
"learning_rate": 3.413059695759297e-05,
"loss": 0.3993,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.41154414415359497,
"step": 350,
"valid_targets_mean": 812.0,
"valid_targets_min": 222
},
{
"epoch": 2.3202614379084965,
"grad_norm": 0.694606217367308,
"learning_rate": 3.389786007090581e-05,
"loss": 0.3489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22677206993103027,
"step": 355,
"valid_targets_mean": 1935.1,
"valid_targets_min": 252
},
{
"epoch": 2.3529411764705883,
"grad_norm": 1.6554928059464398,
"learning_rate": 3.3661425536904354e-05,
"loss": 0.3707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4165865480899811,
"step": 360,
"valid_targets_mean": 564.1,
"valid_targets_min": 242
},
{
"epoch": 2.3856209150326797,
"grad_norm": 1.092528086923834,
"learning_rate": 3.3421356261065805e-05,
"loss": 0.344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4011123776435852,
"step": 365,
"valid_targets_mean": 1385.9,
"valid_targets_min": 273
},
{
"epoch": 2.418300653594771,
"grad_norm": 1.0606643850646214,
"learning_rate": 3.317771611592222e-05,
"loss": 0.3509,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3307228088378906,
"step": 370,
"valid_targets_mean": 1428.1,
"valid_targets_min": 320
},
{
"epoch": 2.450980392156863,
"grad_norm": 1.2049996354335248,
"learning_rate": 3.293056992406671e-05,
"loss": 0.3718,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3773796856403351,
"step": 375,
"valid_targets_mean": 1160.4,
"valid_targets_min": 216
},
{
"epoch": 2.4836601307189543,
"grad_norm": 1.2899198397563096,
"learning_rate": 3.267998344090679e-05,
"loss": 0.3411,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.38119545578956604,
"step": 380,
"valid_targets_mean": 775.1,
"valid_targets_min": 240
},
{
"epoch": 2.5163398692810457,
"grad_norm": 1.1656133845239687,
"learning_rate": 3.242602333716958e-05,
"loss": 0.3492,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3696956932544708,
"step": 385,
"valid_targets_mean": 932.7,
"valid_targets_min": 262
},
{
"epoch": 2.549019607843137,
"grad_norm": 1.0524939861495455,
"learning_rate": 3.21687571811635e-05,
"loss": 0.3939,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3500986695289612,
"step": 390,
"valid_targets_mean": 1204.0,
"valid_targets_min": 264
},
{
"epoch": 2.581699346405229,
"grad_norm": 1.5158107702474672,
"learning_rate": 3.190825342080109e-05,
"loss": 0.3632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.41287726163864136,
"step": 395,
"valid_targets_mean": 684.8,
"valid_targets_min": 246
},
{
"epoch": 2.6143790849673203,
"grad_norm": 1.0708264531248797,
"learning_rate": 3.164458136538789e-05,
"loss": 0.3384,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2635033130645752,
"step": 400,
"valid_targets_mean": 931.7,
"valid_targets_min": 285
},
{
"epoch": 2.6470588235294117,
"grad_norm": 1.1926595250829215,
"learning_rate": 3.137781116718206e-05,
"loss": 0.3844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3295871317386627,
"step": 405,
"valid_targets_mean": 954.6,
"valid_targets_min": 238
},
{
"epoch": 2.6797385620915035,
"grad_norm": 1.4678135870422717,
"learning_rate": 3.110801380272975e-05,
"loss": 0.3714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.40647825598716736,
"step": 410,
"valid_targets_mean": 701.2,
"valid_targets_min": 224
},
{
"epoch": 2.712418300653595,
"grad_norm": 1.2361572946977641,
"learning_rate": 3.0835261053981226e-05,
"loss": 0.374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.41568267345428467,
"step": 415,
"valid_targets_mean": 1122.1,
"valid_targets_min": 274
},
{
"epoch": 2.7450980392156863,
"grad_norm": 1.0225753867309941,
"learning_rate": 3.055962548919257e-05,
"loss": 0.3541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32175296545028687,
"step": 420,
"valid_targets_mean": 1174.4,
"valid_targets_min": 306
},
{
"epoch": 2.7777777777777777,
"grad_norm": 0.8940629697485692,
"learning_rate": 3.0281180443618337e-05,
"loss": 0.3622,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30009371042251587,
"step": 425,
"valid_targets_mean": 1711.0,
"valid_targets_min": 274
},
{
"epoch": 2.810457516339869,
"grad_norm": 0.8560645122217327,
"learning_rate": 3.0000000000000004e-05,
"loss": 0.3183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2567868232727051,
"step": 430,
"valid_targets_mean": 1575.6,
"valid_targets_min": 181
},
{
"epoch": 2.843137254901961,
"grad_norm": 1.4792577436560297,
"learning_rate": 2.9716158968855665e-05,
"loss": 0.3729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3718124330043793,
"step": 435,
"valid_targets_mean": 682.1,
"valid_targets_min": 256
},
{
"epoch": 2.8758169934640523,
"grad_norm": 0.7873403934813155,
"learning_rate": 2.9429732868576e-05,
"loss": 0.3203,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22500234842300415,
"step": 440,
"valid_targets_mean": 1949.4,
"valid_targets_min": 243
},
{
"epoch": 2.9084967320261437,
"grad_norm": 1.31414956514437,
"learning_rate": 2.9140797905331964e-05,
"loss": 0.3724,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3907439410686493,
"step": 445,
"valid_targets_mean": 806.3,
"valid_targets_min": 277
},
{
"epoch": 2.9411764705882355,
"grad_norm": 1.5773260718196254,
"learning_rate": 2.884943095279946e-05,
"loss": 0.3809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.44405168294906616,
"step": 450,
"valid_targets_mean": 660.8,
"valid_targets_min": 274
},
{
"epoch": 2.973856209150327,
"grad_norm": 1.1417920727842683,
"learning_rate": 2.8555709531706423e-05,
"loss": 0.3911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2857981026172638,
"step": 455,
"valid_targets_mean": 842.0,
"valid_targets_min": 255
},
{
"epoch": 3.0065359477124183,
"grad_norm": 1.4849160766480622,
"learning_rate": 2.825971178920777e-05,
"loss": 0.3648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2776503562927246,
"step": 460,
"valid_targets_mean": 744.2,
"valid_targets_min": 238
},
{
"epoch": 3.0392156862745097,
"grad_norm": 1.6833400262584757,
"learning_rate": 2.796151647809364e-05,
"loss": 0.2969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3672228455543518,
"step": 465,
"valid_targets_mean": 619.8,
"valid_targets_min": 253
},
{
"epoch": 3.0718954248366015,
"grad_norm": 1.5789185393237866,
"learning_rate": 2.7661202935836536e-05,
"loss": 0.3121,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3267885148525238,
"step": 470,
"valid_targets_mean": 747.2,
"valid_targets_min": 253
},
{
"epoch": 3.104575163398693,
"grad_norm": 1.525005567408752,
"learning_rate": 2.73588510634829e-05,
"loss": 0.3271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3514612913131714,
"step": 475,
"valid_targets_mean": 866.7,
"valid_targets_min": 291
},
{
"epoch": 3.1372549019607843,
"grad_norm": 1.2913239483066061,
"learning_rate": 2.7054541304394736e-05,
"loss": 0.3019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2418668270111084,
"step": 480,
"valid_targets_mean": 889.4,
"valid_targets_min": 243
},
{
"epoch": 3.1699346405228757,
"grad_norm": 1.2082673756279816,
"learning_rate": 2.6748354622846962e-05,
"loss": 0.251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24368639290332794,
"step": 485,
"valid_targets_mean": 1031.2,
"valid_targets_min": 289
},
{
"epoch": 3.2026143790849675,
"grad_norm": 1.1922254782261243,
"learning_rate": 2.6440372482486127e-05,
"loss": 0.2652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2941184639930725,
"step": 490,
"valid_targets_mean": 1239.8,
"valid_targets_min": 270
},
{
"epoch": 3.235294117647059,
"grad_norm": 1.6204743095393073,
"learning_rate": 2.613067682465631e-05,
"loss": 0.28,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34600523114204407,
"step": 495,
"valid_targets_mean": 727.4,
"valid_targets_min": 259
},
{
"epoch": 3.2679738562091503,
"grad_norm": 1.7067005151903063,
"learning_rate": 2.5819350046597927e-05,
"loss": 0.297,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33082401752471924,
"step": 500,
"valid_targets_mean": 711.6,
"valid_targets_min": 266
},
{
"epoch": 3.3006535947712417,
"grad_norm": 1.344918045352721,
"learning_rate": 2.55064749795252e-05,
"loss": 0.3077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29931801557540894,
"step": 505,
"valid_targets_mean": 1001.1,
"valid_targets_min": 219
},
{
"epoch": 3.3333333333333335,
"grad_norm": 1.5947027063326324,
"learning_rate": 2.519213486658819e-05,
"loss": 0.2939,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3358372151851654,
"step": 510,
"valid_targets_mean": 640.4,
"valid_targets_min": 257
},
{
"epoch": 3.366013071895425,
"grad_norm": 2.1390767780943434,
"learning_rate": 2.4876413340725244e-05,
"loss": 0.3398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3361467719078064,
"step": 515,
"valid_targets_mean": 840.6,
"valid_targets_min": 290
},
{
"epoch": 3.3986928104575163,
"grad_norm": 1.4750832729481385,
"learning_rate": 2.4559394402411703e-05,
"loss": 0.3096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28374865651130676,
"step": 520,
"valid_targets_mean": 662.6,
"valid_targets_min": 258
},
{
"epoch": 3.431372549019608,
"grad_norm": 1.1883189575628739,
"learning_rate": 2.4241162397310836e-05,
"loss": 0.2718,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.288798451423645,
"step": 525,
"valid_targets_mean": 1315.8,
"valid_targets_min": 253
},
{
"epoch": 3.4640522875816995,
"grad_norm": 1.4617369234648558,
"learning_rate": 2.3921801993832964e-05,
"loss": 0.2899,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35622355341911316,
"step": 530,
"valid_targets_mean": 895.0,
"valid_targets_min": 292
},
{
"epoch": 3.496732026143791,
"grad_norm": 1.0468449297189089,
"learning_rate": 2.3601398160608667e-05,
"loss": 0.2905,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2579011917114258,
"step": 535,
"valid_targets_mean": 1593.1,
"valid_targets_min": 261
},
{
"epoch": 3.5294117647058822,
"grad_norm": 1.7782119793681848,
"learning_rate": 2.3280036143882145e-05,
"loss": 0.3064,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.340090811252594,
"step": 540,
"valid_targets_mean": 528.3,
"valid_targets_min": 285
},
{
"epoch": 3.5620915032679736,
"grad_norm": 1.1956892052285935,
"learning_rate": 2.2957801444830684e-05,
"loss": 0.2679,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2931376099586487,
"step": 545,
"valid_targets_mean": 1387.6,
"valid_targets_min": 280
},
{
"epoch": 3.5947712418300655,
"grad_norm": 1.330594535756061,
"learning_rate": 2.2634779796816377e-05,
"loss": 0.329,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29296964406967163,
"step": 550,
"valid_targets_mean": 1105.3,
"valid_targets_min": 279
},
{
"epoch": 3.627450980392157,
"grad_norm": 0.9003334164561678,
"learning_rate": 2.2311057142575953e-05,
"loss": 0.2683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1992267370223999,
"step": 555,
"valid_targets_mean": 1443.8,
"valid_targets_min": 236
},
{
"epoch": 3.6601307189542482,
"grad_norm": 1.5529200780881063,
"learning_rate": 2.198671961135498e-05,
"loss": 0.2962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3122693598270416,
"step": 560,
"valid_targets_mean": 701.2,
"valid_targets_min": 240
},
{
"epoch": 3.69281045751634,
"grad_norm": 1.5208864513474185,
"learning_rate": 2.166185349599245e-05,
"loss": 0.2646,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26625901460647583,
"step": 565,
"valid_targets_mean": 877.5,
"valid_targets_min": 260
},
{
"epoch": 3.7254901960784315,
"grad_norm": 1.0194886982642561,
"learning_rate": 2.1336545229961772e-05,
"loss": 0.3037,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25733599066734314,
"step": 570,
"valid_targets_mean": 1400.1,
"valid_targets_min": 231
},
{
"epoch": 3.758169934640523,
"grad_norm": 1.1707112854047086,
"learning_rate": 2.1010881364374404e-05,
"loss": 0.323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35556066036224365,
"step": 575,
"valid_targets_mean": 1315.2,
"valid_targets_min": 230
},
{
"epoch": 3.7908496732026142,
"grad_norm": 1.0812520101554173,
"learning_rate": 2.0684948544952217e-05,
"loss": 0.2629,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21427355706691742,
"step": 580,
"valid_targets_mean": 978.9,
"valid_targets_min": 196
},
{
"epoch": 3.8235294117647056,
"grad_norm": 1.7609304063968088,
"learning_rate": 2.0358833488974556e-05,
"loss": 0.3031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28584054112434387,
"step": 585,
"valid_targets_mean": 803.2,
"valid_targets_min": 251
},
{
"epoch": 3.8562091503267975,
"grad_norm": 1.372524503176478,
"learning_rate": 2.0032622962206428e-05,
"loss": 0.3152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25724029541015625,
"step": 590,
"valid_targets_mean": 837.6,
"valid_targets_min": 240
},
{
"epoch": 3.888888888888889,
"grad_norm": 1.551633309136953,
"learning_rate": 1.9706403755813672e-05,
"loss": 0.2989,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3098876476287842,
"step": 595,
"valid_targets_mean": 778.6,
"valid_targets_min": 252
},
{
"epoch": 3.9215686274509802,
"grad_norm": 1.4200996111858124,
"learning_rate": 1.9380262663271407e-05,
"loss": 0.2982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28785258531570435,
"step": 600,
"valid_targets_mean": 1276.4,
"valid_targets_min": 246
},
{
"epoch": 3.954248366013072,
"grad_norm": 1.1423875399407577,
"learning_rate": 1.9054286457271892e-05,
"loss": 0.2867,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26374930143356323,
"step": 605,
"valid_targets_mean": 1187.8,
"valid_targets_min": 262
},
{
"epoch": 3.9869281045751634,
"grad_norm": 1.047432790499157,
"learning_rate": 1.8728561866637886e-05,
"loss": 0.3039,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22139273583889008,
"step": 610,
"valid_targets_mean": 1173.8,
"valid_targets_min": 241
},
{
"epoch": 4.019607843137255,
"grad_norm": 1.1536745154975416,
"learning_rate": 1.840317555324764e-05,
"loss": 0.2386,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24080944061279297,
"step": 615,
"valid_targets_mean": 1125.4,
"valid_targets_min": 262
},
{
"epoch": 4.052287581699346,
"grad_norm": 2.128292235679121,
"learning_rate": 1.8078214088977817e-05,
"loss": 0.2196,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2866114377975464,
"step": 620,
"valid_targets_mean": 497.8,
"valid_targets_min": 258
},
{
"epoch": 4.084967320261438,
"grad_norm": 1.0323930725322927,
"learning_rate": 1.7753763932670257e-05,
"loss": 0.2553,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18582549691200256,
"step": 625,
"valid_targets_mean": 1817.0,
"valid_targets_min": 269
},
{
"epoch": 4.117647058823529,
"grad_norm": 1.296142459740979,
"learning_rate": 1.742991140712881e-05,
"loss": 0.2434,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1894800066947937,
"step": 630,
"valid_targets_mean": 1049.4,
"valid_targets_min": 272
},
{
"epoch": 4.150326797385621,
"grad_norm": 1.710406042987915,
"learning_rate": 1.7106742676152454e-05,
"loss": 0.2344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23869173228740692,
"step": 635,
"valid_targets_mean": 681.4,
"valid_targets_min": 236
},
{
"epoch": 4.183006535947713,
"grad_norm": 1.4576865063285587,
"learning_rate": 1.678434372161064e-05,
"loss": 0.2277,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18657907843589783,
"step": 640,
"valid_targets_mean": 978.1,
"valid_targets_min": 219
},
{
"epoch": 4.215686274509804,
"grad_norm": 1.0562589689629556,
"learning_rate": 1.646280032056704e-05,
"loss": 0.2179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1481604427099228,
"step": 645,
"valid_targets_mean": 1139.1,
"valid_targets_min": 235
},
{
"epoch": 4.248366013071895,
"grad_norm": 1.4916205632940045,
"learning_rate": 1.6142198022457853e-05,
"loss": 0.233,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24131783843040466,
"step": 650,
"valid_targets_mean": 1026.0,
"valid_targets_min": 308
},
{
"epoch": 4.281045751633987,
"grad_norm": 1.659388286048901,
"learning_rate": 1.5822622126330597e-05,
"loss": 0.2493,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2790229022502899,
"step": 655,
"valid_targets_mean": 866.1,
"valid_targets_min": 289
},
{
"epoch": 4.313725490196078,
"grad_norm": 1.73499163192446,
"learning_rate": 1.550415765814955e-05,
"loss": 0.2102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19306321442127228,
"step": 660,
"valid_targets_mean": 854.9,
"valid_targets_min": 258
},
{
"epoch": 4.34640522875817,
"grad_norm": 1.7154850302464963,
"learning_rate": 1.5186889348173857e-05,
"loss": 0.2354,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25681236386299133,
"step": 665,
"valid_targets_mean": 689.3,
"valid_targets_min": 217
},
{
"epoch": 4.379084967320262,
"grad_norm": 1.488660446451009,
"learning_rate": 1.487090160841433e-05,
"loss": 0.2582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2581811547279358,
"step": 670,
"valid_targets_mean": 959.4,
"valid_targets_min": 278
},
{
"epoch": 4.411764705882353,
"grad_norm": 1.1881498761592073,
"learning_rate": 1.4556278510174827e-05,
"loss": 0.2262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19084610044956207,
"step": 675,
"valid_targets_mean": 1209.3,
"valid_targets_min": 226
},
{
"epoch": 4.444444444444445,
"grad_norm": 1.2552326944423986,
"learning_rate": 1.424310376168441e-05,
"loss": 0.238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1857946366071701,
"step": 680,
"valid_targets_mean": 1078.2,
"valid_targets_min": 248
},
{
"epoch": 4.477124183006536,
"grad_norm": 1.4969283216444473,
"learning_rate": 1.3931460685826022e-05,
"loss": 0.2424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22742964327335358,
"step": 685,
"valid_targets_mean": 1085.7,
"valid_targets_min": 219
},
{
"epoch": 4.509803921568627,
"grad_norm": 1.154906103363336,
"learning_rate": 1.3621432197967664e-05,
"loss": 0.2228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1935591995716095,
"step": 690,
"valid_targets_mean": 1364.5,
"valid_targets_min": 287
},
{
"epoch": 4.542483660130719,
"grad_norm": 1.81580756139067,
"learning_rate": 1.3313100783902097e-05,
"loss": 0.2396,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2677111327648163,
"step": 695,
"valid_targets_mean": 630.1,
"valid_targets_min": 243
},
{
"epoch": 4.57516339869281,
"grad_norm": 1.4394912824181447,
"learning_rate": 1.3006548477900735e-05,
"loss": 0.2442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2861871123313904,
"step": 700,
"valid_targets_mean": 1012.4,
"valid_targets_min": 258
},
{
"epoch": 4.607843137254902,
"grad_norm": 1.7782439408173458,
"learning_rate": 1.270185684088771e-05,
"loss": 0.221,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2639431953430176,
"step": 705,
"valid_targets_mean": 893.2,
"valid_targets_min": 218
},
{
"epoch": 4.640522875816993,
"grad_norm": 1.3770119209689564,
"learning_rate": 1.2399106938739903e-05,
"loss": 0.2347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22172415256500244,
"step": 710,
"valid_targets_mean": 898.8,
"valid_targets_min": 285
},
{
"epoch": 4.673202614379085,
"grad_norm": 1.5667989718671909,
"learning_rate": 1.2098379320718633e-05,
"loss": 0.2288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25639814138412476,
"step": 715,
"valid_targets_mean": 774.8,
"valid_targets_min": 266
},
{
"epoch": 4.705882352941177,
"grad_norm": 1.6863304630924782,
"learning_rate": 1.179975399803881e-05,
"loss": 0.2324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2574901878833771,
"step": 720,
"valid_targets_mean": 951.9,
"valid_targets_min": 235
},
{
"epoch": 4.738562091503268,
"grad_norm": 1.835300847793114,
"learning_rate": 1.1503310422581286e-05,
"loss": 0.2347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23203280568122864,
"step": 725,
"valid_targets_mean": 602.1,
"valid_targets_min": 270
},
{
"epoch": 4.771241830065359,
"grad_norm": 1.4010835866515463,
"learning_rate": 1.1209127465753978e-05,
"loss": 0.2208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2546510100364685,
"step": 730,
"valid_targets_mean": 1022.6,
"valid_targets_min": 225
},
{
"epoch": 4.803921568627451,
"grad_norm": 1.6323587959134847,
"learning_rate": 1.0917283397507392e-05,
"loss": 0.2089,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2114773392677307,
"step": 735,
"valid_targets_mean": 891.1,
"valid_targets_min": 246
},
{
"epoch": 4.836601307189542,
"grad_norm": 1.8162227837000073,
"learning_rate": 1.0627855865510294e-05,
"loss": 0.2332,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.259385347366333,
"step": 740,
"valid_targets_mean": 751.1,
"valid_targets_min": 295
},
{
"epoch": 4.8692810457516345,
"grad_norm": 1.5458544536658743,
"learning_rate": 1.034092187449082e-05,
"loss": 0.2475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1714608073234558,
"step": 745,
"valid_targets_mean": 771.1,
"valid_targets_min": 246
},
{
"epoch": 4.901960784313726,
"grad_norm": 1.3785541410906907,
"learning_rate": 1.0056557765748684e-05,
"loss": 0.2137,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24545931816101074,
"step": 750,
"valid_targets_mean": 995.0,
"valid_targets_min": 267
},
{
"epoch": 4.934640522875817,
"grad_norm": 1.3403750984818765,
"learning_rate": 9.774839196843953e-06,
"loss": 0.2251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25746211409568787,
"step": 755,
"valid_targets_mean": 1251.8,
"valid_targets_min": 222
},
{
"epoch": 4.967320261437909,
"grad_norm": 1.619409098874215,
"learning_rate": 9.49584112146765e-06,
"loss": 0.2221,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2491072118282318,
"step": 760,
"valid_targets_mean": 717.7,
"valid_targets_min": 219
},
{
"epoch": 5.0,
"grad_norm": 1.532384895615285,
"learning_rate": 9.21963776949969e-06,
"loss": 0.2197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2333248406648636,
"step": 765,
"valid_targets_mean": 766.5,
"valid_targets_min": 240
},
{
"epoch": 5.032679738562091,
"grad_norm": 1.7276785744874288,
"learning_rate": 8.946302627259363e-06,
"loss": 0.1998,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20002366602420807,
"step": 770,
"valid_targets_mean": 606.7,
"valid_targets_min": 255
},
{
"epoch": 5.065359477124183,
"grad_norm": 1.3996086672565842,
"learning_rate": 8.67590841795366e-06,
"loss": 0.1893,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16175302863121033,
"step": 775,
"valid_targets_mean": 969.9,
"valid_targets_min": 257
},
{
"epoch": 5.098039215686274,
"grad_norm": 1.3153977909254402,
"learning_rate": 8.408527082328605e-06,
"loss": 0.1854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1600106954574585,
"step": 780,
"valid_targets_mean": 1062.1,
"valid_targets_min": 257
},
{
"epoch": 5.130718954248366,
"grad_norm": 1.5111274969532478,
"learning_rate": 8.144229759528835e-06,
"loss": 0.1881,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15232618153095245,
"step": 785,
"valid_targets_mean": 868.0,
"valid_targets_min": 243
},
{
"epoch": 5.163398692810458,
"grad_norm": 1.30810433786,
"learning_rate": 7.883086768170369e-06,
"loss": 0.1847,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15596047043800354,
"step": 790,
"valid_targets_mean": 1220.3,
"valid_targets_min": 245
},
{
"epoch": 5.196078431372549,
"grad_norm": 1.2979766762023117,
"learning_rate": 7.625167587631732e-06,
"loss": 0.2001,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1786264181137085,
"step": 795,
"valid_targets_mean": 1454.9,
"valid_targets_min": 266
},
{
"epoch": 5.228758169934641,
"grad_norm": 1.5405276569855957,
"learning_rate": 7.370540839568372e-06,
"loss": 0.1901,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15867263078689575,
"step": 800,
"valid_targets_mean": 666.9,
"valid_targets_min": 240
},
{
"epoch": 5.261437908496732,
"grad_norm": 1.5240591186359513,
"learning_rate": 7.119274269655265e-06,
"loss": 0.2073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2132365107536316,
"step": 805,
"valid_targets_mean": 1250.9,
"valid_targets_min": 264
},
{
"epoch": 5.294117647058823,
"grad_norm": 1.703110559867695,
"learning_rate": 6.87143472956256e-06,
"loss": 0.1913,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1889723688364029,
"step": 810,
"valid_targets_mean": 872.8,
"valid_targets_min": 253
},
{
"epoch": 5.326797385620915,
"grad_norm": 1.3548680024706885,
"learning_rate": 6.627088159169146e-06,
"loss": 0.1651,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16059917211532593,
"step": 815,
"valid_targets_mean": 1151.3,
"valid_targets_min": 272
},
{
"epoch": 5.359477124183006,
"grad_norm": 1.766434503841294,
"learning_rate": 6.3862995690187505e-06,
"loss": 0.1777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1978355199098587,
"step": 820,
"valid_targets_mean": 688.6,
"valid_targets_min": 242
},
{
"epoch": 5.392156862745098,
"grad_norm": 1.681475454731881,
"learning_rate": 6.1491330230232944e-06,
"loss": 0.2005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21753373742103577,
"step": 825,
"valid_targets_mean": 723.4,
"valid_targets_min": 230
},
{
"epoch": 5.42483660130719,
"grad_norm": 1.436963945472817,
"learning_rate": 5.915651621418172e-06,
"loss": 0.1839,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16063036024570465,
"step": 830,
"valid_targets_mean": 1000.5,
"valid_targets_min": 281
},
{
"epoch": 5.457516339869281,
"grad_norm": 1.6529262568452745,
"learning_rate": 5.6859174839738576e-06,
"loss": 0.2127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22638444602489471,
"step": 835,
"valid_targets_mean": 919.0,
"valid_targets_min": 226
},
{
"epoch": 5.490196078431373,
"grad_norm": 1.3361133443873117,
"learning_rate": 5.459991733468375e-06,
"loss": 0.1693,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1512422263622284,
"step": 840,
"valid_targets_mean": 1104.3,
"valid_targets_min": 289
},
{
"epoch": 5.522875816993464,
"grad_norm": 1.4473821491885344,
"learning_rate": 5.237934479425091e-06,
"loss": 0.2142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1956217885017395,
"step": 845,
"valid_targets_mean": 1222.6,
"valid_targets_min": 270
},
{
"epoch": 5.555555555555555,
"grad_norm": 2.161746051226456,
"learning_rate": 5.019804802120027e-06,
"loss": 0.2004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2561233341693878,
"step": 850,
"valid_targets_mean": 559.7,
"valid_targets_min": 217
},
{
"epoch": 5.588235294117647,
"grad_norm": 1.7693395053973981,
"learning_rate": 4.805660736863023e-06,
"loss": 0.1831,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19513806700706482,
"step": 855,
"valid_targets_mean": 617.1,
"valid_targets_min": 287
},
{
"epoch": 5.620915032679738,
"grad_norm": 1.3552287003331067,
"learning_rate": 4.595559258556963e-06,
"loss": 0.188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1493796408176422,
"step": 860,
"valid_targets_mean": 1039.1,
"valid_targets_min": 222
},
{
"epoch": 5.65359477124183,
"grad_norm": 1.3819829355501174,
"learning_rate": 4.389556266539081e-06,
"loss": 0.2014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17469432950019836,
"step": 865,
"valid_targets_mean": 1213.0,
"valid_targets_min": 290
},
{
"epoch": 5.686274509803922,
"grad_norm": 1.1282318414623302,
"learning_rate": 4.187706569708472e-06,
"loss": 0.2089,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17088523507118225,
"step": 870,
"valid_targets_mean": 1821.5,
"valid_targets_min": 255
},
{
"epoch": 5.718954248366013,
"grad_norm": 1.5706108442305917,
"learning_rate": 3.990063871943681e-06,
"loss": 0.1865,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20732146501541138,
"step": 875,
"valid_targets_mean": 747.2,
"valid_targets_min": 257
},
{
"epoch": 5.751633986928105,
"grad_norm": 1.2522615185034307,
"learning_rate": 3.796680757814344e-06,
"loss": 0.2047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16442234814167023,
"step": 880,
"valid_targets_mean": 1394.4,
"valid_targets_min": 257
},
{
"epoch": 5.784313725490196,
"grad_norm": 1.5816802112242634,
"learning_rate": 3.6076086785905708e-06,
"loss": 0.1978,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20470181107521057,
"step": 885,
"valid_targets_mean": 890.7,
"valid_targets_min": 274
},
{
"epoch": 5.816993464052287,
"grad_norm": 1.7816275222819353,
"learning_rate": 3.4228979385539153e-06,
"loss": 0.1838,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2052854597568512,
"step": 890,
"valid_targets_mean": 843.9,
"valid_targets_min": 274
},
{
"epoch": 5.849673202614379,
"grad_norm": 2.7820140943526086,
"learning_rate": 3.242597681613471e-06,
"loss": 0.2068,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3125446140766144,
"step": 895,
"valid_targets_mean": 460.5,
"valid_targets_min": 261
},
{
"epoch": 5.882352941176471,
"grad_norm": 1.089471821874377,
"learning_rate": 3.0667558782306782e-06,
"loss": 0.1695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12326781451702118,
"step": 900,
"valid_targets_mean": 1121.2,
"valid_targets_min": 219
},
{
"epoch": 5.915032679738562,
"grad_norm": 1.7665649158055792,
"learning_rate": 2.895419312656409e-06,
"loss": 0.1708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19156697392463684,
"step": 905,
"valid_targets_mean": 633.0,
"valid_targets_min": 257
},
{
"epoch": 5.947712418300654,
"grad_norm": 1.6688900557662307,
"learning_rate": 2.7286335704835788e-06,
"loss": 0.1935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18692585825920105,
"step": 910,
"valid_targets_mean": 1061.8,
"valid_targets_min": 243
},
{
"epoch": 5.980392156862745,
"grad_norm": 1.7410431833200393,
"learning_rate": 2.566443026518692e-06,
"loss": 0.2118,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2652069926261902,
"step": 915,
"valid_targets_mean": 920.4,
"valid_targets_min": 247
},
{
"epoch": 6.0130718954248366,
"grad_norm": 1.5421028631124705,
"learning_rate": 2.4088908329755678e-06,
"loss": 0.187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17312213778495789,
"step": 920,
"valid_targets_mean": 767.6,
"valid_targets_min": 259
},
{
"epoch": 6.045751633986928,
"grad_norm": 1.5991633631226976,
"learning_rate": 2.256018907994284e-06,
"loss": 0.139,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17524030804634094,
"step": 925,
"valid_targets_mean": 662.6,
"valid_targets_min": 245
},
{
"epoch": 6.078431372549019,
"grad_norm": 1.3250031504324633,
"learning_rate": 2.107867924488509e-06,
"loss": 0.1548,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12431657314300537,
"step": 930,
"valid_targets_mean": 958.3,
"valid_targets_min": 218
},
{
"epoch": 6.111111111111111,
"grad_norm": 1.2101774128898037,
"learning_rate": 1.9644772993241166e-06,
"loss": 0.1588,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1561109721660614,
"step": 935,
"valid_targets_mean": 1136.2,
"valid_targets_min": 318
},
{
"epoch": 6.143790849673203,
"grad_norm": 1.925356995762324,
"learning_rate": 1.8258851828319678e-06,
"loss": 0.1741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17998185753822327,
"step": 940,
"valid_targets_mean": 535.2,
"valid_targets_min": 258
},
{
"epoch": 6.176470588235294,
"grad_norm": 1.737151480718245,
"learning_rate": 1.692128448657695e-06,
"loss": 0.1674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18432410061359406,
"step": 945,
"valid_targets_mean": 649.9,
"valid_targets_min": 235
},
{
"epoch": 6.209150326797386,
"grad_norm": 1.4475105159337607,
"learning_rate": 1.5632426839511494e-06,
"loss": 0.1791,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1958422064781189,
"step": 950,
"valid_targets_mean": 1076.3,
"valid_targets_min": 284
},
{
"epoch": 6.241830065359477,
"grad_norm": 1.846609568443287,
"learning_rate": 1.4392621798981154e-06,
"loss": 0.1831,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20268788933753967,
"step": 955,
"valid_targets_mean": 756.5,
"valid_targets_min": 248
},
{
"epoch": 6.2745098039215685,
"grad_norm": 2.1487512775179427,
"learning_rate": 1.3202199225968481e-06,
"loss": 0.1799,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21326568722724915,
"step": 960,
"valid_targets_mean": 772.2,
"valid_targets_min": 277
},
{
"epoch": 6.30718954248366,
"grad_norm": 1.9580761806861307,
"learning_rate": 1.2061475842818337e-06,
"loss": 0.178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19729763269424438,
"step": 965,
"valid_targets_mean": 605.1,
"valid_targets_min": 246
},
{
"epoch": 6.339869281045751,
"grad_norm": 2.549605085746636,
"learning_rate": 1.0970755148971057e-06,
"loss": 0.1761,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27497926354408264,
"step": 970,
"valid_targets_mean": 525.9,
"valid_targets_min": 226
},
{
"epoch": 6.372549019607844,
"grad_norm": 1.9954702320448223,
"learning_rate": 9.930327340213908e-07,
"loss": 0.1725,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24627891182899475,
"step": 975,
"valid_targets_mean": 668.2,
"valid_targets_min": 219
},
{
"epoch": 6.405228758169935,
"grad_norm": 1.4583942689664242,
"learning_rate": 8.940469231471893e-07,
"loss": 0.163,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15345948934555054,
"step": 980,
"valid_targets_mean": 996.8,
"valid_targets_min": 231
},
{
"epoch": 6.437908496732026,
"grad_norm": 1.2627907187503407,
"learning_rate": 8.001444183158602e-07,
"loss": 0.1796,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19024597108364105,
"step": 985,
"valid_targets_mean": 1571.8,
"valid_targets_min": 258
},
{
"epoch": 6.470588235294118,
"grad_norm": 1.5109741084844859,
"learning_rate": 7.1135020311071e-07,
"loss": 0.1758,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15086877346038818,
"step": 990,
"valid_targets_mean": 737.7,
"valid_targets_min": 226
},
{
"epoch": 6.503267973856209,
"grad_norm": 1.6130378471299882,
"learning_rate": 6.276879020098769e-07,
"loss": 0.1753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19527272880077362,
"step": 995,
"valid_targets_mean": 776.8,
"valid_targets_min": 276
},
{
"epoch": 6.5359477124183005,
"grad_norm": 1.0872457293239366,
"learning_rate": 5.491797741008232e-07,
"loss": 0.1872,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14185872673988342,
"step": 1000,
"valid_targets_mean": 1552.2,
"valid_targets_min": 308
},
{
"epoch": 6.568627450980392,
"grad_norm": 1.5205248767147233,
"learning_rate": 4.758467071581363e-07,
"loss": 0.1598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15201915800571442,
"step": 1005,
"valid_targets_mean": 837.2,
"valid_targets_min": 262
},
{
"epoch": 6.601307189542483,
"grad_norm": 1.5480417864738425,
"learning_rate": 4.077082120861309e-07,
"loss": 0.1659,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18016156554222107,
"step": 1010,
"valid_targets_mean": 948.2,
"valid_targets_min": 259
},
{
"epoch": 6.633986928104575,
"grad_norm": 1.4118892710756072,
"learning_rate": 3.4478241772780695e-07,
"loss": 0.1596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15900777280330658,
"step": 1015,
"valid_targets_mean": 1190.9,
"valid_targets_min": 283
},
{
"epoch": 6.666666666666667,
"grad_norm": 1.7202119833416936,
"learning_rate": 2.8708606604151757e-07,
"loss": 0.1885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19836293160915375,
"step": 1020,
"valid_targets_mean": 671.6,
"valid_targets_min": 242
},
{
"epoch": 6.699346405228758,
"grad_norm": 1.6749245084847142,
"learning_rate": 2.346345076466272e-07,
"loss": 0.1514,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17178763449192047,
"step": 1025,
"valid_targets_mean": 923.9,
"valid_targets_min": 196
},
{
"epoch": 6.73202614379085,
"grad_norm": 1.3050532980878529,
"learning_rate": 1.8744169773932784e-07,
"loss": 0.1579,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1522129327058792,
"step": 1030,
"valid_targets_mean": 1047.8,
"valid_targets_min": 273
},
{
"epoch": 6.764705882352941,
"grad_norm": 1.7719572564362351,
"learning_rate": 1.4552019237976e-07,
"loss": 0.1979,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22099488973617554,
"step": 1035,
"valid_targets_mean": 829.3,
"valid_targets_min": 288
},
{
"epoch": 6.7973856209150325,
"grad_norm": 1.4013677157515687,
"learning_rate": 1.0888114515134274e-07,
"loss": 0.1934,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17296719551086426,
"step": 1040,
"valid_targets_mean": 1032.6,
"valid_targets_min": 255
},
{
"epoch": 6.830065359477124,
"grad_norm": 1.4687939649728274,
"learning_rate": 7.753430419328301e-08,
"loss": 0.1638,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16742470860481262,
"step": 1045,
"valid_targets_mean": 1204.0,
"valid_targets_min": 271
},
{
"epoch": 6.862745098039216,
"grad_norm": 1.3669411898643498,
"learning_rate": 5.1488009606979195e-08,
"loss": 0.1568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13698092103004456,
"step": 1050,
"valid_targets_mean": 980.1,
"valid_targets_min": 276
},
{
"epoch": 6.895424836601308,
"grad_norm": 1.119666687197484,
"learning_rate": 3.074919123708275e-08,
"loss": 0.191,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13298380374908447,
"step": 1055,
"valid_targets_mean": 1280.6,
"valid_targets_min": 238
},
{
"epoch": 6.928104575163399,
"grad_norm": 1.5063802393305312,
"learning_rate": 1.5323366827737496e-08,
"loss": 0.193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17369940876960754,
"step": 1060,
"valid_targets_mean": 982.8,
"valid_targets_min": 219
},
{
"epoch": 6.96078431372549,
"grad_norm": 1.614719027777907,
"learning_rate": 5.2146405545427935e-09,
"loss": 0.1605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1444833129644394,
"step": 1065,
"valid_targets_mean": 855.0,
"valid_targets_min": 235
},
{
"epoch": 6.993464052287582,
"grad_norm": 1.435878529641309,
"learning_rate": 4.2570193260482727e-10,
"loss": 0.1657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.153692826628685,
"step": 1070,
"valid_targets_mean": 1271.9,
"valid_targets_min": 230
},
{
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21700537204742432,
"step": 1071,
"total_flos": 87884495978496.0,
"train_loss": 0.3235391679834696,
"train_runtime": 3658.4254,
"train_samples_per_second": 4.669,
"train_steps_per_second": 0.293,
"valid_targets_mean": 902.0,
"valid_targets_min": 329
}
],
"logging_steps": 5,
"max_steps": 1071,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 87884495978496.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}