| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.3722635608756604, | |
| "eval_steps": 500, | |
| "global_step": 11000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.010783996549121105, | |
| "grad_norm": 0.2076384574174881, | |
| "learning_rate": 5.861244019138756e-06, | |
| "loss": 1.0803, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02156799309824221, | |
| "grad_norm": 0.25434058904647827, | |
| "learning_rate": 1.1842105263157895e-05, | |
| "loss": 1.0521, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03235198964736331, | |
| "grad_norm": 0.24684220552444458, | |
| "learning_rate": 1.7822966507177032e-05, | |
| "loss": 1.0288, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04313598619648442, | |
| "grad_norm": 0.3034498691558838, | |
| "learning_rate": 2.380382775119617e-05, | |
| "loss": 0.9972, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05391998274560552, | |
| "grad_norm": 0.2725016176700592, | |
| "learning_rate": 2.9784688995215314e-05, | |
| "loss": 0.9555, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06470397929472663, | |
| "grad_norm": 0.27356916666030884, | |
| "learning_rate": 3.576555023923445e-05, | |
| "loss": 0.9688, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07548797584384773, | |
| "grad_norm": 0.2624454200267792, | |
| "learning_rate": 4.174641148325359e-05, | |
| "loss": 0.9699, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.08627197239296884, | |
| "grad_norm": 0.2676330506801605, | |
| "learning_rate": 4.772727272727273e-05, | |
| "loss": 0.9739, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09705596894208994, | |
| "grad_norm": 0.24369767308235168, | |
| "learning_rate": 4.999934880025785e-05, | |
| "loss": 0.9833, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.10783996549121104, | |
| "grad_norm": 0.26960158348083496, | |
| "learning_rate": 4.9995554200393156e-05, | |
| "loss": 0.9677, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11862396204033215, | |
| "grad_norm": 0.2564559578895569, | |
| "learning_rate": 4.998837209058379e-05, | |
| "loss": 0.9493, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.12940795858945325, | |
| "grad_norm": 0.23627087473869324, | |
| "learning_rate": 4.9977803444181587e-05, | |
| "loss": 0.9726, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.14019195513857435, | |
| "grad_norm": 0.22857290506362915, | |
| "learning_rate": 4.996384969349704e-05, | |
| "loss": 0.9653, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.15097595168769545, | |
| "grad_norm": 0.25175178050994873, | |
| "learning_rate": 4.9946512729605226e-05, | |
| "loss": 0.9725, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.16175994823681655, | |
| "grad_norm": 0.20284195244312286, | |
| "learning_rate": 4.992579490208947e-05, | |
| "loss": 0.968, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.17254394478593768, | |
| "grad_norm": 0.228809654712677, | |
| "learning_rate": 4.990169901872295e-05, | |
| "loss": 0.9338, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.18332794133505878, | |
| "grad_norm": 0.2436237633228302, | |
| "learning_rate": 4.987422834508818e-05, | |
| "loss": 0.9581, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.19411193788417988, | |
| "grad_norm": 0.2001142054796219, | |
| "learning_rate": 4.9843386604134425e-05, | |
| "loss": 0.9512, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.20489593443330098, | |
| "grad_norm": 0.20406965911388397, | |
| "learning_rate": 4.980917797567315e-05, | |
| "loss": 0.9479, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.21567993098242208, | |
| "grad_norm": 0.20756883919239044, | |
| "learning_rate": 4.9771607095811565e-05, | |
| "loss": 0.9552, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.22646392753154318, | |
| "grad_norm": 0.23893098533153534, | |
| "learning_rate": 4.9730679056324334e-05, | |
| "loss": 0.9732, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2372479240806643, | |
| "grad_norm": 0.20374947786331177, | |
| "learning_rate": 4.968639940396346e-05, | |
| "loss": 0.961, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2480319206297854, | |
| "grad_norm": 0.20845109224319458, | |
| "learning_rate": 4.963877413970663e-05, | |
| "loss": 0.9481, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.2588159171789065, | |
| "grad_norm": 0.23683245480060577, | |
| "learning_rate": 4.958780971794388e-05, | |
| "loss": 0.9558, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.2695999137280276, | |
| "grad_norm": 0.18015944957733154, | |
| "learning_rate": 4.953351304560292e-05, | |
| "loss": 0.9367, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.2803839102771487, | |
| "grad_norm": 0.21432434022426605, | |
| "learning_rate": 4.947589148121301e-05, | |
| "loss": 0.9289, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.2911679068262698, | |
| "grad_norm": 0.217897430062294, | |
| "learning_rate": 4.941495283390778e-05, | |
| "loss": 0.9663, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.3019519033753909, | |
| "grad_norm": 0.23911495506763458, | |
| "learning_rate": 4.9350705362366836e-05, | |
| "loss": 0.9534, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.312735899924512, | |
| "grad_norm": 0.21729810535907745, | |
| "learning_rate": 4.928315777369652e-05, | |
| "loss": 0.9663, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.3235198964736331, | |
| "grad_norm": 0.19448955357074738, | |
| "learning_rate": 4.9212319222249914e-05, | |
| "loss": 0.9203, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3343038930227542, | |
| "grad_norm": 0.20799997448921204, | |
| "learning_rate": 4.913819930838616e-05, | |
| "loss": 0.9426, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.34508788957187536, | |
| "grad_norm": 0.1989525556564331, | |
| "learning_rate": 4.906080807716941e-05, | |
| "loss": 0.9544, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.35587188612099646, | |
| "grad_norm": 0.21680687367916107, | |
| "learning_rate": 4.898015601700745e-05, | |
| "loss": 0.9666, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.36665588267011756, | |
| "grad_norm": 0.2180759161710739, | |
| "learning_rate": 4.889625405823027e-05, | |
| "loss": 0.9441, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.37743987921923866, | |
| "grad_norm": 0.19334350526332855, | |
| "learning_rate": 4.880911357160877e-05, | |
| "loss": 0.9415, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.38822387576835976, | |
| "grad_norm": 0.19350044429302216, | |
| "learning_rate": 4.871874636681366e-05, | |
| "loss": 0.9534, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.39900787231748086, | |
| "grad_norm": 0.23279784619808197, | |
| "learning_rate": 4.862516469081505e-05, | |
| "loss": 0.9578, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.40979186886660196, | |
| "grad_norm": 0.2038542479276657, | |
| "learning_rate": 4.852838122622264e-05, | |
| "loss": 0.9416, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.42057586541572306, | |
| "grad_norm": 0.21980704367160797, | |
| "learning_rate": 4.842840908956692e-05, | |
| "loss": 0.9359, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.43135986196484416, | |
| "grad_norm": 0.20842380821704865, | |
| "learning_rate": 4.832526182952156e-05, | |
| "loss": 0.9495, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.44214385851396526, | |
| "grad_norm": 0.2161971479654312, | |
| "learning_rate": 4.821895342506724e-05, | |
| "loss": 0.9388, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.45292785506308636, | |
| "grad_norm": 0.2119661122560501, | |
| "learning_rate": 4.8109498283597146e-05, | |
| "loss": 0.9618, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.46371185161220746, | |
| "grad_norm": 0.17877915501594543, | |
| "learning_rate": 4.799691123896441e-05, | |
| "loss": 0.9498, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.4744958481613286, | |
| "grad_norm": 0.2198779135942459, | |
| "learning_rate": 4.788120754947179e-05, | |
| "loss": 0.9464, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.4852798447104497, | |
| "grad_norm": 0.20385344326496124, | |
| "learning_rate": 4.7762402895803763e-05, | |
| "loss": 0.9423, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.4960638412595708, | |
| "grad_norm": 0.21472816169261932, | |
| "learning_rate": 4.764051337890143e-05, | |
| "loss": 0.9295, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5068478378086919, | |
| "grad_norm": 0.21423693001270294, | |
| "learning_rate": 4.7515555517780405e-05, | |
| "loss": 0.9557, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.517631834357813, | |
| "grad_norm": 0.2088768184185028, | |
| "learning_rate": 4.7387546247292156e-05, | |
| "loss": 0.9392, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.5284158309069341, | |
| "grad_norm": 0.18323567509651184, | |
| "learning_rate": 4.725650291582885e-05, | |
| "loss": 0.9418, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.5391998274560552, | |
| "grad_norm": 0.22341737151145935, | |
| "learning_rate": 4.712244328297224e-05, | |
| "loss": 0.9207, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5499838240051763, | |
| "grad_norm": 0.2024504542350769, | |
| "learning_rate": 4.698538551708682e-05, | |
| "loss": 0.9337, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.5607678205542974, | |
| "grad_norm": 0.20455148816108704, | |
| "learning_rate": 4.684534819285758e-05, | |
| "loss": 0.9451, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.5715518171034185, | |
| "grad_norm": 0.19093358516693115, | |
| "learning_rate": 4.6702350288772626e-05, | |
| "loss": 0.9468, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.5823358136525396, | |
| "grad_norm": 0.1995963305234909, | |
| "learning_rate": 4.6556411184551176e-05, | |
| "loss": 0.9373, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5931198102016607, | |
| "grad_norm": 0.19664354622364044, | |
| "learning_rate": 4.640755065851712e-05, | |
| "loss": 0.9609, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.6039038067507818, | |
| "grad_norm": 0.20155999064445496, | |
| "learning_rate": 4.6255788884918595e-05, | |
| "loss": 0.9221, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.6146878032999029, | |
| "grad_norm": 0.2094108611345291, | |
| "learning_rate": 4.610114643119382e-05, | |
| "loss": 0.9665, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.625471799849024, | |
| "grad_norm": 0.23038670420646667, | |
| "learning_rate": 4.5943644255183785e-05, | |
| "loss": 0.9223, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.6362557963981451, | |
| "grad_norm": 0.22103433310985565, | |
| "learning_rate": 4.5783303702291856e-05, | |
| "loss": 0.9271, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.6470397929472662, | |
| "grad_norm": 0.21444232761859894, | |
| "learning_rate": 4.5620146502591065e-05, | |
| "loss": 0.9553, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6578237894963873, | |
| "grad_norm": 0.20402322709560394, | |
| "learning_rate": 4.5454194767879046e-05, | |
| "loss": 0.9342, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.6686077860455084, | |
| "grad_norm": 0.17598140239715576, | |
| "learning_rate": 4.52854709886814e-05, | |
| "loss": 0.9343, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6793917825946296, | |
| "grad_norm": 0.2235531210899353, | |
| "learning_rate": 4.511399803120367e-05, | |
| "loss": 0.9325, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.6901757791437507, | |
| "grad_norm": 0.1978316605091095, | |
| "learning_rate": 4.49397991342324e-05, | |
| "loss": 0.9175, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.7009597756928718, | |
| "grad_norm": 0.20724375545978546, | |
| "learning_rate": 4.476289790598571e-05, | |
| "loss": 0.9509, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.7117437722419929, | |
| "grad_norm": 0.19276615977287292, | |
| "learning_rate": 4.458331832091385e-05, | |
| "loss": 0.9247, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.722527768791114, | |
| "grad_norm": 0.2208387851715088, | |
| "learning_rate": 4.440108471644997e-05, | |
| "loss": 0.9409, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.7333117653402351, | |
| "grad_norm": 0.21308571100234985, | |
| "learning_rate": 4.421622178971193e-05, | |
| "loss": 0.9267, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.7440957618893562, | |
| "grad_norm": 0.2115100473165512, | |
| "learning_rate": 4.4028754594155125e-05, | |
| "loss": 0.933, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.7548797584384773, | |
| "grad_norm": 0.21246980130672455, | |
| "learning_rate": 4.383870853617721e-05, | |
| "loss": 0.9422, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.7656637549875984, | |
| "grad_norm": 0.2082446962594986, | |
| "learning_rate": 4.364610937167485e-05, | |
| "loss": 0.9204, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.7764477515367195, | |
| "grad_norm": 0.22102369368076324, | |
| "learning_rate": 4.345098320255321e-05, | |
| "loss": 0.9226, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.7872317480858406, | |
| "grad_norm": 0.19831791520118713, | |
| "learning_rate": 4.325335647318848e-05, | |
| "loss": 0.9327, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.7980157446349617, | |
| "grad_norm": 0.2220238745212555, | |
| "learning_rate": 4.3053255966844016e-05, | |
| "loss": 0.9318, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.8087997411840828, | |
| "grad_norm": 0.20910035073757172, | |
| "learning_rate": 4.285070880204057e-05, | |
| "loss": 0.9306, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.8195837377332039, | |
| "grad_norm": 0.21745839715003967, | |
| "learning_rate": 4.264574242888105e-05, | |
| "loss": 0.9304, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.830367734282325, | |
| "grad_norm": 0.24437028169631958, | |
| "learning_rate": 4.2438384625330374e-05, | |
| "loss": 0.9433, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.8411517308314461, | |
| "grad_norm": 0.2319614738225937, | |
| "learning_rate": 4.222866349345083e-05, | |
| "loss": 0.9536, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.8519357273805672, | |
| "grad_norm": 0.2375030517578125, | |
| "learning_rate": 4.2016607455593624e-05, | |
| "loss": 0.9421, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.8627197239296883, | |
| "grad_norm": 0.2176317423582077, | |
| "learning_rate": 4.1802245250546926e-05, | |
| "loss": 0.9268, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.8735037204788094, | |
| "grad_norm": 0.2226661890745163, | |
| "learning_rate": 4.158560592964104e-05, | |
| "loss": 0.925, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.8842877170279305, | |
| "grad_norm": 0.2202196568250656, | |
| "learning_rate": 4.136671885281124e-05, | |
| "loss": 0.9465, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.8950717135770516, | |
| "grad_norm": 0.20654049515724182, | |
| "learning_rate": 4.114561368461884e-05, | |
| "loss": 0.9251, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.9058557101261727, | |
| "grad_norm": 0.23357035219669342, | |
| "learning_rate": 4.092232039023084e-05, | |
| "loss": 0.9417, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.9166397066752938, | |
| "grad_norm": 0.20816297829151154, | |
| "learning_rate": 4.069686923135896e-05, | |
| "loss": 0.9225, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.9274237032244149, | |
| "grad_norm": 0.20184196531772614, | |
| "learning_rate": 4.04692907621584e-05, | |
| "loss": 0.9212, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.938207699773536, | |
| "grad_norm": 0.1984609067440033, | |
| "learning_rate": 4.023961582508704e-05, | |
| "loss": 0.9261, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.9489916963226572, | |
| "grad_norm": 0.22444488108158112, | |
| "learning_rate": 4.000787554672553e-05, | |
| "loss": 0.9291, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.9597756928717783, | |
| "grad_norm": 0.21115441620349884, | |
| "learning_rate": 3.977410133355884e-05, | |
| "loss": 0.9349, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.9705596894208994, | |
| "grad_norm": 0.19569146633148193, | |
| "learning_rate": 3.953832486771996e-05, | |
| "loss": 0.9049, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.9813436859700205, | |
| "grad_norm": 0.22996151447296143, | |
| "learning_rate": 3.930057810269612e-05, | |
| "loss": 0.894, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.9921276825191416, | |
| "grad_norm": 0.19879557192325592, | |
| "learning_rate": 3.906089325899841e-05, | |
| "loss": 0.955, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.0028038391027714, | |
| "grad_norm": 0.207550510764122, | |
| "learning_rate": 3.8819302819795046e-05, | |
| "loss": 0.9362, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.0135878356518926, | |
| "grad_norm": 0.20435990393161774, | |
| "learning_rate": 3.8575839526509105e-05, | |
| "loss": 0.9217, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.0243718322010138, | |
| "grad_norm": 0.22362500429153442, | |
| "learning_rate": 3.833053637438128e-05, | |
| "loss": 0.9342, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.0351558287501348, | |
| "grad_norm": 0.18318387866020203, | |
| "learning_rate": 3.8083426607998216e-05, | |
| "loss": 0.8937, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.045939825299256, | |
| "grad_norm": 0.20834890007972717, | |
| "learning_rate": 3.783454371678705e-05, | |
| "loss": 0.9103, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.056723821848377, | |
| "grad_norm": 0.2138434648513794, | |
| "learning_rate": 3.758392143047677e-05, | |
| "loss": 0.9003, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.0675078183974982, | |
| "grad_norm": 0.21266281604766846, | |
| "learning_rate": 3.733159371452701e-05, | |
| "loss": 0.9142, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.0782918149466192, | |
| "grad_norm": 0.25879135727882385, | |
| "learning_rate": 3.707759476552489e-05, | |
| "loss": 0.8976, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.0890758114957404, | |
| "grad_norm": 0.2042112946510315, | |
| "learning_rate": 3.682195900655057e-05, | |
| "loss": 0.9092, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.0998598080448614, | |
| "grad_norm": 0.25018027424812317, | |
| "learning_rate": 3.656472108251205e-05, | |
| "loss": 0.8843, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.1106438045939826, | |
| "grad_norm": 0.2371663898229599, | |
| "learning_rate": 3.630591585544995e-05, | |
| "loss": 0.8764, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.1214278011431036, | |
| "grad_norm": 0.23503442108631134, | |
| "learning_rate": 3.604557839981284e-05, | |
| "loss": 0.9091, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.1322117976922248, | |
| "grad_norm": 0.24042187631130219, | |
| "learning_rate": 3.5783743997703824e-05, | |
| "loss": 0.9206, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.1429957942413458, | |
| "grad_norm": 0.25456419587135315, | |
| "learning_rate": 3.5520448134098886e-05, | |
| "loss": 0.8784, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.153779790790467, | |
| "grad_norm": 0.23184941709041595, | |
| "learning_rate": 3.5255726492037854e-05, | |
| "loss": 0.8798, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.164563787339588, | |
| "grad_norm": 0.24035029113292694, | |
| "learning_rate": 3.498961494778851e-05, | |
| "loss": 0.9039, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.1753477838887092, | |
| "grad_norm": 0.24733129143714905, | |
| "learning_rate": 3.4722149565984385e-05, | |
| "loss": 0.9094, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.1861317804378302, | |
| "grad_norm": 0.25908830761909485, | |
| "learning_rate": 3.445336659473718e-05, | |
| "loss": 0.9167, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.1969157769869514, | |
| "grad_norm": 0.24497312307357788, | |
| "learning_rate": 3.4183302460724246e-05, | |
| "loss": 0.8919, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.2076997735360724, | |
| "grad_norm": 0.24705035984516144, | |
| "learning_rate": 3.391199376425188e-05, | |
| "loss": 0.9018, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.2184837700851936, | |
| "grad_norm": 0.2370757907629013, | |
| "learning_rate": 3.363947727429507e-05, | |
| "loss": 0.8925, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.2292677666343146, | |
| "grad_norm": 0.24430540204048157, | |
| "learning_rate": 3.336578992351442e-05, | |
| "loss": 0.8834, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.2400517631834358, | |
| "grad_norm": 0.20415450632572174, | |
| "learning_rate": 3.3090968803250856e-05, | |
| "loss": 0.9195, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.2508357597325568, | |
| "grad_norm": 0.24224655330181122, | |
| "learning_rate": 3.281505115849885e-05, | |
| "loss": 0.8963, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.261619756281678, | |
| "grad_norm": 0.263614684343338, | |
| "learning_rate": 3.253807438285879e-05, | |
| "loss": 0.9081, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.2724037528307992, | |
| "grad_norm": 0.22934329509735107, | |
| "learning_rate": 3.226007601346927e-05, | |
| "loss": 0.8957, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.2831877493799202, | |
| "grad_norm": 0.2595406770706177, | |
| "learning_rate": 3.198109372591984e-05, | |
| "loss": 0.8798, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.2939717459290412, | |
| "grad_norm": 0.2610589861869812, | |
| "learning_rate": 3.170677292377989e-05, | |
| "loss": 0.9074, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.3047557424781624, | |
| "grad_norm": 0.27022746205329895, | |
| "learning_rate": 3.142595414578805e-05, | |
| "loss": 0.9059, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.3155397390272836, | |
| "grad_norm": 0.21983672678470612, | |
| "learning_rate": 3.114426449358401e-05, | |
| "loss": 0.9179, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.3263237355764046, | |
| "grad_norm": 0.22227706015110016, | |
| "learning_rate": 3.086174214301658e-05, | |
| "loss": 0.8916, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.3371077321255256, | |
| "grad_norm": 0.2406383454799652, | |
| "learning_rate": 3.05784253827856e-05, | |
| "loss": 0.8994, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.3478917286746468, | |
| "grad_norm": 0.23662422597408295, | |
| "learning_rate": 3.029435260925288e-05, | |
| "loss": 0.893, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.358675725223768, | |
| "grad_norm": 0.26936379075050354, | |
| "learning_rate": 3.000956232123856e-05, | |
| "loss": 0.9033, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.369459721772889, | |
| "grad_norm": 0.253090500831604, | |
| "learning_rate": 2.972409311480357e-05, | |
| "loss": 0.8867, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.3802437183220102, | |
| "grad_norm": 0.2847846746444702, | |
| "learning_rate": 2.94379836780189e-05, | |
| "loss": 0.8721, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.3910277148711312, | |
| "grad_norm": 0.26056525111198425, | |
| "learning_rate": 2.9151272785722466e-05, | |
| "loss": 0.8913, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.4018117114202524, | |
| "grad_norm": 0.23132337629795074, | |
| "learning_rate": 2.8863999294264122e-05, | |
| "loss": 0.9058, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.4125957079693734, | |
| "grad_norm": 0.2190658152103424, | |
| "learning_rate": 2.8576202136239688e-05, | |
| "loss": 0.8906, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.4233797045184946, | |
| "grad_norm": 0.26291966438293457, | |
| "learning_rate": 2.8287920315214643e-05, | |
| "loss": 0.9229, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.4341637010676156, | |
| "grad_norm": 0.23218290507793427, | |
| "learning_rate": 2.799919290043818e-05, | |
| "loss": 0.9242, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.4449476976167368, | |
| "grad_norm": 0.2565305233001709, | |
| "learning_rate": 2.7710059021548344e-05, | |
| "loss": 0.883, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.4557316941658578, | |
| "grad_norm": 0.2470102459192276, | |
| "learning_rate": 2.7420557863269043e-05, | |
| "loss": 0.8949, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.466515690714979, | |
| "grad_norm": 0.25169292092323303, | |
| "learning_rate": 2.713072866009953e-05, | |
| "loss": 0.9122, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.4772996872641002, | |
| "grad_norm": 0.23668742179870605, | |
| "learning_rate": 2.6840610690997182e-05, | |
| "loss": 0.8919, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.4880836838132212, | |
| "grad_norm": 0.2786126732826233, | |
| "learning_rate": 2.655024327405422e-05, | |
| "loss": 0.8883, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.4988676803623422, | |
| "grad_norm": 0.25976258516311646, | |
| "learning_rate": 2.6259665761169183e-05, | |
| "loss": 0.9291, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.5096516769114634, | |
| "grad_norm": 0.2566768229007721, | |
| "learning_rate": 2.5968917532713743e-05, | |
| "loss": 0.901, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.5204356734605846, | |
| "grad_norm": 0.24728557467460632, | |
| "learning_rate": 2.5678037992195714e-05, | |
| "loss": 0.8811, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.5312196700097056, | |
| "grad_norm": 0.24409767985343933, | |
| "learning_rate": 2.5387066560918906e-05, | |
| "loss": 0.904, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.5420036665588266, | |
| "grad_norm": 0.2483212798833847, | |
| "learning_rate": 2.5096042672640596e-05, | |
| "loss": 0.8945, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.5527876631079478, | |
| "grad_norm": 0.23452620208263397, | |
| "learning_rate": 2.4805005768227252e-05, | |
| "loss": 0.9063, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.563571659657069, | |
| "grad_norm": 0.22194162011146545, | |
| "learning_rate": 2.4513995290309358e-05, | |
| "loss": 0.8834, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.57435565620619, | |
| "grad_norm": 0.25706538558006287, | |
| "learning_rate": 2.4223050677935947e-05, | |
| "loss": 0.9149, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.585139652755311, | |
| "grad_norm": 0.2703045606613159, | |
| "learning_rate": 2.3932211361229683e-05, | |
| "loss": 0.9059, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.5959236493044322, | |
| "grad_norm": 0.26212379336357117, | |
| "learning_rate": 2.3641516756043053e-05, | |
| "loss": 0.8996, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.6067076458535534, | |
| "grad_norm": 0.241121307015419, | |
| "learning_rate": 2.3351006258616618e-05, | |
| "loss": 0.8934, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.6174916424026744, | |
| "grad_norm": 0.2937757968902588, | |
| "learning_rate": 2.3060719240239807e-05, | |
| "loss": 0.8907, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.6282756389517954, | |
| "grad_norm": 0.2826499938964844, | |
| "learning_rate": 2.2770695041915187e-05, | |
| "loss": 0.8963, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.6390596355009166, | |
| "grad_norm": 0.2622433602809906, | |
| "learning_rate": 2.248097296902672e-05, | |
| "loss": 0.8797, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.6498436320500378, | |
| "grad_norm": 0.26400211453437805, | |
| "learning_rate": 2.2191592286013042e-05, | |
| "loss": 0.9084, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.6606276285991588, | |
| "grad_norm": 0.25721365213394165, | |
| "learning_rate": 2.1902592211046032e-05, | |
| "loss": 0.882, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.6714116251482798, | |
| "grad_norm": 0.25235188007354736, | |
| "learning_rate": 2.1614011910715896e-05, | |
| "loss": 0.9306, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.6821956216974012, | |
| "grad_norm": 0.2521611154079437, | |
| "learning_rate": 2.1325890494723065e-05, | |
| "loss": 0.8911, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.6929796182465222, | |
| "grad_norm": 0.2881399691104889, | |
| "learning_rate": 2.103826701057793e-05, | |
| "loss": 0.8837, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.7037636147956432, | |
| "grad_norm": 0.2743209898471832, | |
| "learning_rate": 2.075118043830888e-05, | |
| "loss": 0.9072, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.7145476113447644, | |
| "grad_norm": 0.2475823312997818, | |
| "learning_rate": 2.046466968517963e-05, | |
| "loss": 0.9109, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.7253316078938856, | |
| "grad_norm": 0.28786906599998474, | |
| "learning_rate": 2.0178773580416263e-05, | |
| "loss": 0.9085, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.7361156044430066, | |
| "grad_norm": 0.2793081998825073, | |
| "learning_rate": 1.9893530869944986e-05, | |
| "loss": 0.8721, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.7468996009921276, | |
| "grad_norm": 0.26357826590538025, | |
| "learning_rate": 1.9608980211141028e-05, | |
| "loss": 0.9014, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.7576835975412488, | |
| "grad_norm": 0.26504483819007874, | |
| "learning_rate": 1.93251601675897e-05, | |
| "loss": 0.9091, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.76846759409037, | |
| "grad_norm": 0.26386550068855286, | |
| "learning_rate": 1.9042109203860027e-05, | |
| "loss": 0.8985, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.779251590639491, | |
| "grad_norm": 0.2590016722679138, | |
| "learning_rate": 1.87598656802919e-05, | |
| "loss": 0.8865, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.790035587188612, | |
| "grad_norm": 0.2528024911880493, | |
| "learning_rate": 1.8478467847797238e-05, | |
| "loss": 0.9046, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.8008195837377332, | |
| "grad_norm": 0.27202916145324707, | |
| "learning_rate": 1.8197953842676168e-05, | |
| "loss": 0.9021, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.8116035802868544, | |
| "grad_norm": 0.240274578332901, | |
| "learning_rate": 1.7918361681448504e-05, | |
| "loss": 0.8921, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.8223875768359754, | |
| "grad_norm": 0.29021942615509033, | |
| "learning_rate": 1.7639729255701655e-05, | |
| "loss": 0.9074, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.8331715733850964, | |
| "grad_norm": 0.28871750831604004, | |
| "learning_rate": 1.7362094326955336e-05, | |
| "loss": 0.8962, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.8439555699342176, | |
| "grad_norm": 0.2800693213939667, | |
| "learning_rate": 1.7085494521544025e-05, | |
| "loss": 0.9222, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.8547395664833388, | |
| "grad_norm": 0.2543833255767822, | |
| "learning_rate": 1.6809967325517573e-05, | |
| "loss": 0.8925, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.8655235630324598, | |
| "grad_norm": 0.255051851272583, | |
| "learning_rate": 1.6535550079561027e-05, | |
| "loss": 0.8818, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.8763075595815808, | |
| "grad_norm": 0.289727121591568, | |
| "learning_rate": 1.6262279973933984e-05, | |
| "loss": 0.8878, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.887091556130702, | |
| "grad_norm": 0.2506343424320221, | |
| "learning_rate": 1.5990194043430444e-05, | |
| "loss": 0.8961, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.8978755526798232, | |
| "grad_norm": 0.3042599558830261, | |
| "learning_rate": 1.5719329162359638e-05, | |
| "loss": 0.9082, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.9086595492289442, | |
| "grad_norm": 0.2791798710823059, | |
| "learning_rate": 1.5449722039548706e-05, | |
| "loss": 0.9023, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.9194435457780652, | |
| "grad_norm": 0.2678021788597107, | |
| "learning_rate": 1.5181409213367726e-05, | |
| "loss": 0.8826, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.9302275423271864, | |
| "grad_norm": 0.2640957832336426, | |
| "learning_rate": 1.4914427046777879e-05, | |
| "loss": 0.887, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.9410115388763076, | |
| "grad_norm": 0.2847963869571686, | |
| "learning_rate": 1.4648811722403358e-05, | |
| "loss": 0.8906, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.9517955354254286, | |
| "grad_norm": 0.2558712661266327, | |
| "learning_rate": 1.4384599237627777e-05, | |
| "loss": 0.9006, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.9625795319745498, | |
| "grad_norm": 0.26001158356666565, | |
| "learning_rate": 1.4121825399715577e-05, | |
| "loss": 0.902, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.973363528523671, | |
| "grad_norm": 0.250234991312027, | |
| "learning_rate": 1.3860525820959358e-05, | |
| "loss": 0.8966, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.984147525072792, | |
| "grad_norm": 0.2639175355434418, | |
| "learning_rate": 1.360073591385342e-05, | |
| "loss": 0.9063, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.994931521621913, | |
| "grad_norm": 0.2366214245557785, | |
| "learning_rate": 1.334249088629464e-05, | |
| "loss": 0.8907, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 2.0056076782055428, | |
| "grad_norm": 0.291415274143219, | |
| "learning_rate": 1.3085825736810828e-05, | |
| "loss": 0.8729, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.016391674754664, | |
| "grad_norm": 0.2706186771392822, | |
| "learning_rate": 1.2830775249817595e-05, | |
| "loss": 0.8663, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 2.027175671303785, | |
| "grad_norm": 0.2555548846721649, | |
| "learning_rate": 1.2577373990904279e-05, | |
| "loss": 0.8663, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.037959667852906, | |
| "grad_norm": 0.254191517829895, | |
| "learning_rate": 1.2325656302149374e-05, | |
| "loss": 0.8592, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 2.0487436644020276, | |
| "grad_norm": 0.30470383167266846, | |
| "learning_rate": 1.2075656297466382e-05, | |
| "loss": 0.8938, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.0595276609511486, | |
| "grad_norm": 0.2882542908191681, | |
| "learning_rate": 1.1827407857980522e-05, | |
| "loss": 0.8754, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 2.0703116575002696, | |
| "grad_norm": 0.33889445662498474, | |
| "learning_rate": 1.1580944627437052e-05, | |
| "loss": 0.8645, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.0810956540493906, | |
| "grad_norm": 0.29919326305389404, | |
| "learning_rate": 1.1336300007641628e-05, | |
| "loss": 0.8685, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 2.091879650598512, | |
| "grad_norm": 0.2923993468284607, | |
| "learning_rate": 1.1098344650456325e-05, | |
| "loss": 0.8577, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.102663647147633, | |
| "grad_norm": 0.2865777611732483, | |
| "learning_rate": 1.0857398452987955e-05, | |
| "loss": 0.8968, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 2.113447643696754, | |
| "grad_norm": 0.28677886724472046, | |
| "learning_rate": 1.0618368924500005e-05, | |
| "loss": 0.8678, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.124231640245875, | |
| "grad_norm": 0.2737389802932739, | |
| "learning_rate": 1.0381288459349405e-05, | |
| "loss": 0.8865, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 2.1350156367949964, | |
| "grad_norm": 0.27073368430137634, | |
| "learning_rate": 1.0146189187747276e-05, | |
| "loss": 0.8733, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.1457996333441174, | |
| "grad_norm": 0.280775785446167, | |
| "learning_rate": 9.913102971404456e-06, | |
| "loss": 0.8408, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 2.1565836298932384, | |
| "grad_norm": 0.2671400308609009, | |
| "learning_rate": 9.682061399213525e-06, | |
| "loss": 0.8792, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.1673676264423594, | |
| "grad_norm": 0.3240983188152313, | |
| "learning_rate": 9.45309578296762e-06, | |
| "loss": 0.8739, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 2.178151622991481, | |
| "grad_norm": 0.30578577518463135, | |
| "learning_rate": 9.226237153117056e-06, | |
| "loss": 0.8731, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 2.188935619540602, | |
| "grad_norm": 0.2961669862270355, | |
| "learning_rate": 9.001516254563835e-06, | |
| "loss": 0.8861, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 2.1997196160897228, | |
| "grad_norm": 0.31330254673957825, | |
| "learning_rate": 8.778963542495015e-06, | |
| "loss": 0.8327, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.2105036126388438, | |
| "grad_norm": 0.3293406665325165, | |
| "learning_rate": 8.558609178255252e-06, | |
| "loss": 0.8567, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 2.221287609187965, | |
| "grad_norm": 0.3065802752971649, | |
| "learning_rate": 8.340483025259233e-06, | |
| "loss": 0.8515, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.232071605737086, | |
| "grad_norm": 0.2637750208377838, | |
| "learning_rate": 8.124614644944412e-06, | |
| "loss": 0.874, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 2.242855602286207, | |
| "grad_norm": 0.26482629776000977, | |
| "learning_rate": 7.911033292764774e-06, | |
| "loss": 0.8373, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.2536395988353286, | |
| "grad_norm": 0.27340102195739746, | |
| "learning_rate": 7.699767914225903e-06, | |
| "loss": 0.9063, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 2.2644235953844496, | |
| "grad_norm": 0.25882843136787415, | |
| "learning_rate": 7.490847140962273e-06, | |
| "loss": 0.8377, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.2752075919335706, | |
| "grad_norm": 0.3063746690750122, | |
| "learning_rate": 7.284299286856877e-06, | |
| "loss": 0.8767, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 2.2859915884826916, | |
| "grad_norm": 0.27114883065223694, | |
| "learning_rate": 7.080152344204028e-06, | |
| "loss": 0.8517, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.2967755850318126, | |
| "grad_norm": 0.26992297172546387, | |
| "learning_rate": 6.878433979915719e-06, | |
| "loss": 0.873, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 2.307559581580934, | |
| "grad_norm": 0.30842849612236023, | |
| "learning_rate": 6.6791715317721075e-06, | |
| "loss": 0.8645, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.318343578130055, | |
| "grad_norm": 0.2740515172481537, | |
| "learning_rate": 6.482392004716492e-06, | |
| "loss": 0.8772, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 2.329127574679176, | |
| "grad_norm": 0.28314441442489624, | |
| "learning_rate": 6.288122067195592e-06, | |
| "loss": 0.87, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.3399115712282974, | |
| "grad_norm": 0.2951704263687134, | |
| "learning_rate": 6.096388047545232e-06, | |
| "loss": 0.8801, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 2.3506955677774184, | |
| "grad_norm": 0.3134472966194153, | |
| "learning_rate": 5.907215930422244e-06, | |
| "loss": 0.8598, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.3614795643265394, | |
| "grad_norm": 0.3114987313747406, | |
| "learning_rate": 5.7206313532829095e-06, | |
| "loss": 0.8578, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 2.3722635608756604, | |
| "grad_norm": 0.3185006380081177, | |
| "learning_rate": 5.5366596029084535e-06, | |
| "loss": 0.8713, | |
| "step": 11000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 13911, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.923221364727559e+19, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |