| { |
| "best_metric": 0.6407684683799744, |
| "best_model_checkpoint": "/gs/bs/tga-t2glrlab-1/lyu/t5/run4-flan-t5-large-base/checkpoint-672", |
| "epoch": 2.7008289374529015, |
| "eval_steps": 16, |
| "global_step": 672, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.15875867009162903, |
| "learning_rate": 0.0009870967741935483, |
| "loss": 0.9746, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_loss": 0.7603664994239807, |
| "eval_runtime": 4.5631, |
| "eval_samples_per_second": 420.983, |
| "eval_steps_per_second": 26.517, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.10888980329036713, |
| "learning_rate": 0.0009741935483870968, |
| "loss": 0.7966, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_loss": 0.7332449555397034, |
| "eval_runtime": 4.6012, |
| "eval_samples_per_second": 417.502, |
| "eval_steps_per_second": 26.298, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.17990568280220032, |
| "learning_rate": 0.0009612903225806452, |
| "loss": 0.7655, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_loss": 0.7393877506256104, |
| "eval_runtime": 4.6014, |
| "eval_samples_per_second": 417.486, |
| "eval_steps_per_second": 26.297, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.11761835217475891, |
| "learning_rate": 0.0009483870967741936, |
| "loss": 0.8534, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_loss": 0.7214094400405884, |
| "eval_runtime": 4.6156, |
| "eval_samples_per_second": 416.194, |
| "eval_steps_per_second": 26.215, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.112620510160923, |
| "learning_rate": 0.0009354838709677419, |
| "loss": 0.7623, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.32, |
| "eval_loss": 0.7035090327262878, |
| "eval_runtime": 4.602, |
| "eval_samples_per_second": 417.423, |
| "eval_steps_per_second": 26.293, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.14100195467472076, |
| "learning_rate": 0.0009225806451612904, |
| "loss": 0.7305, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_loss": 0.6993053555488586, |
| "eval_runtime": 4.5895, |
| "eval_samples_per_second": 418.561, |
| "eval_steps_per_second": 26.364, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.16868335008621216, |
| "learning_rate": 0.0009096774193548387, |
| "loss": 0.807, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.45, |
| "eval_loss": 0.7026967406272888, |
| "eval_runtime": 4.7327, |
| "eval_samples_per_second": 405.898, |
| "eval_steps_per_second": 25.567, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.10501790046691895, |
| "learning_rate": 0.0008967741935483871, |
| "loss": 0.7418, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_loss": 0.6837323904037476, |
| "eval_runtime": 4.5936, |
| "eval_samples_per_second": 418.187, |
| "eval_steps_per_second": 26.341, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.12754693627357483, |
| "learning_rate": 0.0008838709677419356, |
| "loss": 0.7125, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_loss": 0.6846245527267456, |
| "eval_runtime": 4.6234, |
| "eval_samples_per_second": 415.493, |
| "eval_steps_per_second": 26.171, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.1712610423564911, |
| "learning_rate": 0.0008709677419354839, |
| "loss": 0.7813, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_loss": 0.687301516532898, |
| "eval_runtime": 4.576, |
| "eval_samples_per_second": 419.802, |
| "eval_steps_per_second": 26.443, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.10070759803056717, |
| "learning_rate": 0.0008580645161290323, |
| "loss": 0.7321, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.71, |
| "eval_loss": 0.6738956570625305, |
| "eval_runtime": 4.5756, |
| "eval_samples_per_second": 419.835, |
| "eval_steps_per_second": 26.445, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.09799516946077347, |
| "learning_rate": 0.0008451612903225807, |
| "loss": 0.6972, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_loss": 0.6699069738388062, |
| "eval_runtime": 4.602, |
| "eval_samples_per_second": 417.424, |
| "eval_steps_per_second": 26.293, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.15926051139831543, |
| "learning_rate": 0.0008322580645161291, |
| "loss": 0.7642, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.84, |
| "eval_loss": 0.6707108616828918, |
| "eval_runtime": 4.6005, |
| "eval_samples_per_second": 417.567, |
| "eval_steps_per_second": 26.302, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.1038859635591507, |
| "learning_rate": 0.0008193548387096774, |
| "loss": 0.7234, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_loss": 0.6664640307426453, |
| "eval_runtime": 4.5657, |
| "eval_samples_per_second": 420.746, |
| "eval_steps_per_second": 26.502, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.09894105046987534, |
| "learning_rate": 0.0008064516129032258, |
| "loss": 0.6891, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.96, |
| "eval_loss": 0.6674955487251282, |
| "eval_runtime": 4.6128, |
| "eval_samples_per_second": 416.447, |
| "eval_steps_per_second": 26.231, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 0.108543761074543, |
| "learning_rate": 0.0007935483870967743, |
| "loss": 0.7219, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.03, |
| "eval_loss": 0.6673880815505981, |
| "eval_runtime": 4.6113, |
| "eval_samples_per_second": 416.588, |
| "eval_steps_per_second": 26.24, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 0.08566311001777649, |
| "learning_rate": 0.0007806451612903226, |
| "loss": 0.6558, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.09, |
| "eval_loss": 0.6653081774711609, |
| "eval_runtime": 4.6897, |
| "eval_samples_per_second": 409.621, |
| "eval_steps_per_second": 25.801, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.10951746255159378, |
| "learning_rate": 0.000767741935483871, |
| "loss": 0.5958, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.16, |
| "eval_loss": 0.6638582348823547, |
| "eval_runtime": 4.7789, |
| "eval_samples_per_second": 401.974, |
| "eval_steps_per_second": 25.32, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 0.1710771769285202, |
| "learning_rate": 0.0007548387096774194, |
| "loss": 0.6209, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.22, |
| "eval_loss": 0.6717860102653503, |
| "eval_runtime": 4.6605, |
| "eval_samples_per_second": 412.186, |
| "eval_steps_per_second": 25.963, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.29, |
| "grad_norm": 0.09286555647850037, |
| "learning_rate": 0.0007419354838709678, |
| "loss": 0.664, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.29, |
| "eval_loss": 0.6565249562263489, |
| "eval_runtime": 4.6747, |
| "eval_samples_per_second": 410.939, |
| "eval_steps_per_second": 25.884, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 0.09515868872404099, |
| "learning_rate": 0.0007290322580645162, |
| "loss": 0.6123, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.35, |
| "eval_loss": 0.6616882085800171, |
| "eval_runtime": 4.6796, |
| "eval_samples_per_second": 410.502, |
| "eval_steps_per_second": 25.857, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.41, |
| "grad_norm": 0.10440368950366974, |
| "learning_rate": 0.0007161290322580646, |
| "loss": 0.6113, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.41, |
| "eval_loss": 0.6656071543693542, |
| "eval_runtime": 4.6341, |
| "eval_samples_per_second": 414.539, |
| "eval_steps_per_second": 26.111, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.08295060694217682, |
| "learning_rate": 0.000703225806451613, |
| "loss": 0.6742, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.48, |
| "eval_loss": 0.6533424854278564, |
| "eval_runtime": 4.6967, |
| "eval_samples_per_second": 409.008, |
| "eval_steps_per_second": 25.763, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.54, |
| "grad_norm": 0.10447151958942413, |
| "learning_rate": 0.0006903225806451613, |
| "loss": 0.6121, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.54, |
| "eval_loss": 0.6559557914733887, |
| "eval_runtime": 4.6736, |
| "eval_samples_per_second": 411.033, |
| "eval_steps_per_second": 25.89, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.61, |
| "grad_norm": 0.19215475022792816, |
| "learning_rate": 0.0006774193548387097, |
| "loss": 0.5878, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.61, |
| "eval_loss": 0.6661805510520935, |
| "eval_runtime": 4.656, |
| "eval_samples_per_second": 412.586, |
| "eval_steps_per_second": 25.988, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 0.08589986711740494, |
| "learning_rate": 0.0006645161290322582, |
| "loss": 0.6835, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.67, |
| "eval_loss": 0.6515570878982544, |
| "eval_runtime": 5.5084, |
| "eval_samples_per_second": 348.738, |
| "eval_steps_per_second": 21.966, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 0.09526026993989944, |
| "learning_rate": 0.0006516129032258064, |
| "loss": 0.6178, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.74, |
| "eval_loss": 0.6476355791091919, |
| "eval_runtime": 4.6704, |
| "eval_samples_per_second": 411.312, |
| "eval_steps_per_second": 25.908, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.12676902115345, |
| "learning_rate": 0.0006387096774193548, |
| "loss": 0.5738, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.8, |
| "eval_loss": 0.648709774017334, |
| "eval_runtime": 4.6719, |
| "eval_samples_per_second": 411.185, |
| "eval_steps_per_second": 25.9, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 0.08004370331764221, |
| "learning_rate": 0.0006258064516129032, |
| "loss": 0.6869, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.86, |
| "eval_loss": 0.6497883200645447, |
| "eval_runtime": 4.6811, |
| "eval_samples_per_second": 410.376, |
| "eval_steps_per_second": 25.849, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.93, |
| "grad_norm": 0.07917796820402145, |
| "learning_rate": 0.0006129032258064516, |
| "loss": 0.6231, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.93, |
| "eval_loss": 0.6452683806419373, |
| "eval_runtime": 4.7528, |
| "eval_samples_per_second": 404.179, |
| "eval_steps_per_second": 25.458, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.99, |
| "grad_norm": 0.11247972398996353, |
| "learning_rate": 0.0006, |
| "loss": 0.5753, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.99, |
| "eval_loss": 0.6489792466163635, |
| "eval_runtime": 4.6875, |
| "eval_samples_per_second": 409.813, |
| "eval_steps_per_second": 25.813, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.06, |
| "grad_norm": 0.08393968641757965, |
| "learning_rate": 0.0005870967741935483, |
| "loss": 0.6299, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.06, |
| "eval_loss": 0.6549689173698425, |
| "eval_runtime": 4.6812, |
| "eval_samples_per_second": 410.367, |
| "eval_steps_per_second": 25.848, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 0.09901689738035202, |
| "learning_rate": 0.0005741935483870968, |
| "loss": 0.553, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.12, |
| "eval_loss": 0.6531901955604553, |
| "eval_runtime": 4.6345, |
| "eval_samples_per_second": 414.5, |
| "eval_steps_per_second": 26.109, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.19, |
| "grad_norm": 0.14240662753582, |
| "learning_rate": 0.0005612903225806451, |
| "loss": 0.4872, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.19, |
| "eval_loss": 0.6619027256965637, |
| "eval_runtime": 4.6673, |
| "eval_samples_per_second": 411.588, |
| "eval_steps_per_second": 25.925, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.12037284672260284, |
| "learning_rate": 0.0005483870967741935, |
| "loss": 0.5957, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.25, |
| "eval_loss": 0.6506627202033997, |
| "eval_runtime": 4.6368, |
| "eval_samples_per_second": 414.293, |
| "eval_steps_per_second": 26.096, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.31, |
| "grad_norm": 0.08450206369161606, |
| "learning_rate": 0.000535483870967742, |
| "loss": 0.5647, |
| "step": 576 |
| }, |
| { |
| "epoch": 2.31, |
| "eval_loss": 0.6499984264373779, |
| "eval_runtime": 4.64, |
| "eval_samples_per_second": 414.005, |
| "eval_steps_per_second": 26.077, |
| "step": 576 |
| }, |
| { |
| "epoch": 2.38, |
| "grad_norm": 0.09682720899581909, |
| "learning_rate": 0.0005225806451612903, |
| "loss": 0.5017, |
| "step": 592 |
| }, |
| { |
| "epoch": 2.38, |
| "eval_loss": 0.6509167551994324, |
| "eval_runtime": 4.7996, |
| "eval_samples_per_second": 400.243, |
| "eval_steps_per_second": 25.21, |
| "step": 592 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 0.1342058777809143, |
| "learning_rate": 0.0005096774193548387, |
| "loss": 0.5774, |
| "step": 608 |
| }, |
| { |
| "epoch": 2.44, |
| "eval_loss": 0.6469881534576416, |
| "eval_runtime": 4.6664, |
| "eval_samples_per_second": 411.664, |
| "eval_steps_per_second": 25.93, |
| "step": 608 |
| }, |
| { |
| "epoch": 2.51, |
| "grad_norm": 0.09072865545749664, |
| "learning_rate": 0.0004967741935483871, |
| "loss": 0.5773, |
| "step": 624 |
| }, |
| { |
| "epoch": 2.51, |
| "eval_loss": 0.6494817733764648, |
| "eval_runtime": 4.6407, |
| "eval_samples_per_second": 413.945, |
| "eval_steps_per_second": 26.074, |
| "step": 624 |
| }, |
| { |
| "epoch": 2.57, |
| "grad_norm": 0.10313040763139725, |
| "learning_rate": 0.0004838709677419355, |
| "loss": 0.5158, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.57, |
| "eval_loss": 0.6480950117111206, |
| "eval_runtime": 4.6554, |
| "eval_samples_per_second": 412.638, |
| "eval_steps_per_second": 25.991, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 0.12878872454166412, |
| "learning_rate": 0.00047096774193548384, |
| "loss": 0.5638, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.64, |
| "eval_loss": 0.645260214805603, |
| "eval_runtime": 4.654, |
| "eval_samples_per_second": 412.765, |
| "eval_steps_per_second": 25.999, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 0.1101013645529747, |
| "learning_rate": 0.00045806451612903225, |
| "loss": 0.5839, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.7, |
| "eval_loss": 0.6407684683799744, |
| "eval_runtime": 4.6561, |
| "eval_samples_per_second": 412.576, |
| "eval_steps_per_second": 25.987, |
| "step": 672 |
| } |
| ], |
| "logging_steps": 16, |
| "max_steps": 1240, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 16, |
| "total_flos": 1.668443666998395e+17, |
| "train_batch_size": 96, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|