{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.95890410958904, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1842.0, "completions/max_terminated_length": 1842.0, "completions/mean_length": 664.732177734375, "completions/mean_terminated_length": 664.732177734375, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "entropy": 0.6624065637588501, "epoch": 0.00684931506849315, "frac_reward_zero_std": 0.0, "grad_norm": 0.9854248724622482, "kl": 0.0, "learning_rate": 5e-07, "loss": -0.0248, "num_tokens": 41993.0, "reward": 0.10325329005718231, "reward_std": 0.031403958797454834, "rewards/check_gptzero_func/mean": 0.10325329005718231, "rewards/check_gptzero_func/std": 0.22087055444717407, "sampling/importance_sampling_ratio/max": 1.587634563446045, "sampling/importance_sampling_ratio/mean": 1.0002282857894897, "sampling/importance_sampling_ratio/min": 0.4732460379600525, "sampling/sampling_logp_difference/max": 0.7481398582458496, "sampling/sampling_logp_difference/mean": 0.022896580398082733, "step": 1 }, { "clip_ratio/high_max": 0.0033848106395453215, "clip_ratio/high_mean": 0.0025745525490492582, "clip_ratio/low_mean": 0.0011688631493598223, "clip_ratio/low_min": 0.0005839415825903416, "clip_ratio/region_mean": 0.003743415931239724, "entropy": 0.6624065637588501, "epoch": 0.0136986301369863, "grad_norm": 1.0402763538729336, "kl": 0.000677596777677536, "learning_rate": 4.998287671232877e-07, "loss": -0.0247, "step": 2 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 531.0892944335938, "completions/mean_terminated_length": 531.0892944335938, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.6192032098770142, "epoch": 0.02054794520547945, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.1509063443243073, "kl": 0.0006423658924177289, "learning_rate": 4.996575342465753e-07, "loss": 0.006, "num_tokens": 76222.0, "reward": 0.2101871818304062, "reward_std": 0.06809505820274353, "rewards/check_gptzero_func/mean": 0.2101871818304062, "rewards/check_gptzero_func/std": 0.3083561956882477, "sampling/importance_sampling_ratio/max": 1.6167234182357788, "sampling/importance_sampling_ratio/mean": 0.9997578263282776, "sampling/importance_sampling_ratio/min": 0.6230837106704712, "sampling/sampling_logp_difference/max": 0.48040151596069336, "sampling/sampling_logp_difference/mean": 0.02211548201739788, "step": 3 }, { "clip_ratio/high_max": 0.00374531839042902, "clip_ratio/high_mean": 0.0022668836172670126, "clip_ratio/low_mean": 0.0012099641608074307, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034768476616591215, "entropy": 0.6192032098770142, "epoch": 0.0273972602739726, "grad_norm": 1.081347057572592, "kl": 0.0006349833565764129, "learning_rate": 4.99486301369863e-07, "loss": 0.0059, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1321.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 559.9642944335938, "completions/mean_terminated_length": 559.9642944335938, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.6765018701553345, "epoch": 0.03424657534246575, "frac_reward_zero_std": 0.0, "grad_norm": 1.0379693878656595, "kl": 0.000742889940738678, "learning_rate": 4.993150684931507e-07, "loss": -0.0033, "num_tokens": 112454.0, "reward": 0.20098991692066193, "reward_std": 0.047937002032995224, "rewards/check_gptzero_func/mean": 0.20098991692066193, "rewards/check_gptzero_func/std": 0.370850145816803, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0002881288528442, "sampling/importance_sampling_ratio/min": 0.6254628896713257, "sampling/sampling_logp_difference/max": 0.8105227947235107, "sampling/sampling_logp_difference/mean": 0.023045962676405907, "step": 5 }, { "clip_ratio/high_max": 0.004216020926833153, "clip_ratio/high_mean": 0.002751729218289256, "clip_ratio/low_mean": 0.0011448924196884036, "clip_ratio/low_min": 0.0006022886955179274, "clip_ratio/region_mean": 0.0038966217543929815, "entropy": 0.6765018701553345, "epoch": 0.0410958904109589, "grad_norm": 1.0143248849030386, "kl": 0.0006744636921212077, "learning_rate": 4.991438356164384e-07, "loss": -0.0039, "step": 6 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 567.232177734375, "completions/mean_terminated_length": 567.232177734375, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.6401202082633972, "epoch": 0.04794520547945205, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.0106376355951083, "kl": 0.0006594850565306842, "learning_rate": 4.98972602739726e-07, "loss": -0.0208, "num_tokens": 148747.0, "reward": 0.2335750013589859, "reward_std": 0.01781732589006424, "rewards/check_gptzero_func/mean": 0.2335749864578247, "rewards/check_gptzero_func/std": 0.39848193526268005, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.000311017036438, "sampling/importance_sampling_ratio/min": 0.1818014234304428, "sampling/sampling_logp_difference/max": 1.7048403024673462, "sampling/sampling_logp_difference/mean": 0.022539706900715828, "step": 7 }, { "clip_ratio/high_max": 0.0032082130201160908, "clip_ratio/high_mean": 0.0023291537072509527, "clip_ratio/low_mean": 0.0009475289843976498, "clip_ratio/low_min": 0.0001909125567181036, "clip_ratio/region_mean": 0.003276682924479246, "entropy": 0.6401202082633972, "epoch": 0.0547945205479452, "grad_norm": 0.9944975177303474, "kl": 0.0006385048036463559, "learning_rate": 4.988013698630137e-07, "loss": -0.021, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1392.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 574.0714721679688, "completions/mean_terminated_length": 574.0714721679688, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.6614893078804016, "epoch": 0.06164383561643835, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.0693750050158035, "kl": 0.000690817425493151, "learning_rate": 4.986301369863014e-07, "loss": -0.0208, "num_tokens": 185671.0, "reward": 0.14055407047271729, "reward_std": 0.05344964936375618, "rewards/check_gptzero_func/mean": 0.14055407047271729, "rewards/check_gptzero_func/std": 0.2741428315639496, "sampling/importance_sampling_ratio/max": 1.4753236770629883, "sampling/importance_sampling_ratio/mean": 1.0000520944595337, "sampling/importance_sampling_ratio/min": 0.611565887928009, "sampling/sampling_logp_difference/max": 0.4917325973510742, "sampling/sampling_logp_difference/mean": 0.022914398461580276, "step": 9 }, { "clip_ratio/high_max": 0.003518648911267519, "clip_ratio/high_mean": 0.002555683720856905, "clip_ratio/low_mean": 0.0011473775375634432, "clip_ratio/low_min": 0.00022956840984988958, "clip_ratio/region_mean": 0.003703061491250992, "entropy": 0.6604593396186829, "epoch": 0.0684931506849315, "grad_norm": 1.840583921738103, "kl": 0.0006804120494052768, "learning_rate": 4.98458904109589e-07, "loss": -0.0215, "step": 10 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2293.0, "completions/max_terminated_length": 2293.0, "completions/mean_length": 607.2142944335938, "completions/mean_terminated_length": 607.2142944335938, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.6613644361495972, "epoch": 0.07534246575342465, "frac_reward_zero_std": 0.0, "grad_norm": 0.9864855771982752, "kl": 0.0006600932101719081, "learning_rate": 4.982876712328767e-07, "loss": -0.0206, "num_tokens": 224823.0, "reward": 0.19944587349891663, "reward_std": 0.038437020033597946, "rewards/check_gptzero_func/mean": 0.19944585859775543, "rewards/check_gptzero_func/std": 0.3549065887928009, "sampling/importance_sampling_ratio/max": 1.5862442255020142, "sampling/importance_sampling_ratio/mean": 1.000382661819458, "sampling/importance_sampling_ratio/min": 0.5434082746505737, "sampling/sampling_logp_difference/max": 0.6098943948745728, "sampling/sampling_logp_difference/mean": 0.022032376378774643, "step": 11 }, { "clip_ratio/high_max": 0.004174493718892336, "clip_ratio/high_mean": 0.0027722071390599012, "clip_ratio/low_mean": 0.0009644197998568416, "clip_ratio/low_min": 0.0005885814898647368, "clip_ratio/region_mean": 0.0037366270553320646, "entropy": 0.6613644361495972, "epoch": 0.0821917808219178, "grad_norm": 0.967419227878006, "kl": 0.0007064800010994077, "learning_rate": 4.981164383561644e-07, "loss": -0.021, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1548.0, "completions/max_terminated_length": 1548.0, "completions/mean_length": 551.5, "completions/mean_terminated_length": 551.5, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.7070584893226624, "epoch": 0.08904109589041095, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.1283988566469445, "kl": 0.0007686114986427128, "learning_rate": 4.97945205479452e-07, "loss": -0.0047, "num_tokens": 260683.0, "reward": 0.22322571277618408, "reward_std": 0.14387324452400208, "rewards/check_gptzero_func/mean": 0.22322571277618408, "rewards/check_gptzero_func/std": 0.33479437232017517, "sampling/importance_sampling_ratio/max": 1.6301530599594116, "sampling/importance_sampling_ratio/mean": 1.0001825094223022, "sampling/importance_sampling_ratio/min": 0.612243115901947, "sampling/sampling_logp_difference/max": 0.49062585830688477, "sampling/sampling_logp_difference/mean": 0.023247171193361282, "step": 13 }, { "clip_ratio/high_max": 0.004252125974744558, "clip_ratio/high_mean": 0.002528582001104951, "clip_ratio/low_mean": 0.0011564461747184396, "clip_ratio/low_min": 0.0007897334871813655, "clip_ratio/region_mean": 0.003685027826577425, "entropy": 0.7070584893226624, "epoch": 0.0958904109589041, "grad_norm": 1.1004889827761986, "kl": 0.0006948460941202939, "learning_rate": 4.977739726027397e-07, "loss": -0.0053, "step": 14 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1856.0, "completions/max_terminated_length": 1856.0, "completions/mean_length": 662.9285888671875, "completions/mean_terminated_length": 662.9285888671875, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.696018397808075, "epoch": 0.10273972602739725, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 0.9437782155290143, "kl": 0.0007317033596336842, "learning_rate": 4.976027397260274e-07, "loss": -0.0111, "num_tokens": 302405.0, "reward": 0.2522435784339905, "reward_std": 0.05633928254246712, "rewards/check_gptzero_func/mean": 0.2522435784339905, "rewards/check_gptzero_func/std": 0.38217589259147644, "sampling/importance_sampling_ratio/max": 1.7767271995544434, "sampling/importance_sampling_ratio/mean": 0.9994630813598633, "sampling/importance_sampling_ratio/min": 0.4017345905303955, "sampling/sampling_logp_difference/max": 0.911963701248169, "sampling/sampling_logp_difference/mean": 0.023746104910969734, "step": 15 }, { "clip_ratio/high_max": 0.0036496350076049566, "clip_ratio/high_mean": 0.002855118131265044, "clip_ratio/low_mean": 0.0007740295841358602, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036291477736085653, "entropy": 0.696018397808075, "epoch": 0.1095890410958904, "grad_norm": 0.933765951716016, "kl": 0.0007088271086104214, "learning_rate": 4.974315068493151e-07, "loss": -0.0116, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2262.0, "completions/max_terminated_length": 2262.0, "completions/mean_length": 724.732177734375, "completions/mean_terminated_length": 724.732177734375, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "entropy": 0.6474251747131348, "epoch": 0.11643835616438356, "frac_reward_zero_std": 0.0, "grad_norm": 0.9430783545473115, "kl": 0.000837849045637995, "learning_rate": 4.972602739726027e-07, "loss": -0.0054, "num_tokens": 348280.0, "reward": 0.12453384697437286, "reward_std": 0.08048289269208908, "rewards/check_gptzero_func/mean": 0.12453383952379227, "rewards/check_gptzero_func/std": 0.19290240108966827, "sampling/importance_sampling_ratio/max": 1.6007516384124756, "sampling/importance_sampling_ratio/mean": 1.0004312992095947, "sampling/importance_sampling_ratio/min": 0.6281402707099915, "sampling/sampling_logp_difference/max": 0.4704732894897461, "sampling/sampling_logp_difference/mean": 0.023183511570096016, "step": 17 }, { "clip_ratio/high_max": 0.004432405810803175, "clip_ratio/high_mean": 0.0032112966291606426, "clip_ratio/low_mean": 0.0012506352504715323, "clip_ratio/low_min": 0.000586166454013437, "clip_ratio/region_mean": 0.004461931996047497, "entropy": 0.6463571190834045, "epoch": 0.1232876712328767, "grad_norm": 0.9232941755384517, "kl": 0.0007626870647072792, "learning_rate": 4.970890410958904e-07, "loss": -0.0062, "step": 18 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 617.9107666015625, "completions/mean_terminated_length": 617.9107666015625, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.6568394303321838, "epoch": 0.13013698630136986, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 0.9865388518548479, "kl": 0.0008927286835387349, "learning_rate": 4.969178082191781e-07, "loss": 0.0142, "num_tokens": 387709.0, "reward": 0.21184030175209045, "reward_std": 0.04597683250904083, "rewards/check_gptzero_func/mean": 0.21184028685092926, "rewards/check_gptzero_func/std": 0.3479672372341156, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.999893307685852, "sampling/importance_sampling_ratio/min": 0.4769994020462036, "sampling/sampling_logp_difference/max": 0.7402400970458984, "sampling/sampling_logp_difference/mean": 0.02230406180024147, "step": 19 }, { "clip_ratio/high_max": 0.004864864982664585, "clip_ratio/high_mean": 0.002593354554846883, "clip_ratio/low_mean": 0.0011987619800493121, "clip_ratio/low_min": 0.00016189088637474924, "clip_ratio/region_mean": 0.0037921161856502295, "entropy": 0.6553544998168945, "epoch": 0.136986301369863, "grad_norm": 0.9558422717230309, "kl": 0.0007934986497275531, "learning_rate": 4.967465753424657e-07, "loss": 0.0139, "step": 20 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 594.8392944335938, "completions/mean_terminated_length": 594.8392944335938, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "entropy": 0.6729802489280701, "epoch": 0.14383561643835616, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.0222075552631416, "kl": 0.0008601789595559239, "learning_rate": 4.965753424657534e-07, "loss": -0.0266, "num_tokens": 425536.0, "reward": 0.13307321071624756, "reward_std": 0.031427957117557526, "rewards/check_gptzero_func/mean": 0.13307321071624756, "rewards/check_gptzero_func/std": 0.3023197650909424, "sampling/importance_sampling_ratio/max": 1.6004769802093506, "sampling/importance_sampling_ratio/mean": 1.0003774166107178, "sampling/importance_sampling_ratio/min": 0.5483719110488892, "sampling/sampling_logp_difference/max": 0.6008014678955078, "sampling/sampling_logp_difference/mean": 0.022904226556420326, "step": 21 }, { "clip_ratio/high_max": 0.003737085033208132, "clip_ratio/high_mean": 0.002700899029150605, "clip_ratio/low_mean": 0.0008782556978985667, "clip_ratio/low_min": 0.0004914004821330309, "clip_ratio/region_mean": 0.00357915461063385, "entropy": 0.6729802489280701, "epoch": 0.1506849315068493, "grad_norm": 0.9944042226245553, "kl": 0.0008842604584060609, "learning_rate": 4.96404109589041e-07, "loss": -0.0275, "step": 22 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 672.1607666015625, "completions/mean_terminated_length": 672.1607666015625, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "entropy": 0.6572409868240356, "epoch": 0.15753424657534246, "frac_reward_zero_std": 0.0, "grad_norm": 0.9839549045173749, "kl": 0.0010974082397297025, "learning_rate": 4.962328767123287e-07, "loss": 0.0108, "num_tokens": 468009.0, "reward": 0.10441223531961441, "reward_std": 0.04066242650151253, "rewards/check_gptzero_func/mean": 0.10441223531961441, "rewards/check_gptzero_func/std": 0.25216054916381836, "sampling/importance_sampling_ratio/max": 1.5072870254516602, "sampling/importance_sampling_ratio/mean": 0.9999232292175293, "sampling/importance_sampling_ratio/min": 0.47003254294395447, "sampling/sampling_logp_difference/max": 0.7549533843994141, "sampling/sampling_logp_difference/mean": 0.02294806018471718, "step": 23 }, { "clip_ratio/high_max": 0.0036191537510603666, "clip_ratio/high_mean": 0.0027465156745165586, "clip_ratio/low_mean": 0.0014141456922516227, "clip_ratio/low_min": 0.0005433011101558805, "clip_ratio/region_mean": 0.0041606612503528595, "entropy": 0.6572409868240356, "epoch": 0.1643835616438356, "grad_norm": 0.9649448554334126, "kl": 0.0009848165791481733, "learning_rate": 4.960616438356164e-07, "loss": 0.0103, "step": 24 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1475.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 689.4642944335938, "completions/mean_terminated_length": 689.4642944335938, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "entropy": 0.6990359425544739, "epoch": 0.17123287671232876, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 0.982751957581329, "kl": 0.0010932879522442818, "learning_rate": 4.958904109589041e-07, "loss": -0.0075, "num_tokens": 511565.0, "reward": 0.16849955916404724, "reward_std": 0.04179370030760765, "rewards/check_gptzero_func/mean": 0.16849954426288605, "rewards/check_gptzero_func/std": 0.3155466318130493, "sampling/importance_sampling_ratio/max": 1.817976951599121, "sampling/importance_sampling_ratio/mean": 0.9996775388717651, "sampling/importance_sampling_ratio/min": 0.6267204880714417, "sampling/sampling_logp_difference/max": 0.5977243185043335, "sampling/sampling_logp_difference/mean": 0.023188067600131035, "step": 25 }, { "clip_ratio/high_max": 0.0038997214287519455, "clip_ratio/high_mean": 0.002720348769798875, "clip_ratio/low_mean": 0.00121069245506078, "clip_ratio/low_min": 0.0005331911379471421, "clip_ratio/region_mean": 0.003931041341274977, "entropy": 0.6990359425544739, "epoch": 0.1780821917808219, "grad_norm": 0.9860246938193017, "kl": 0.00104226463008672, "learning_rate": 4.957191780821918e-07, "loss": -0.0086, "step": 26 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1240.0, "completions/max_terminated_length": 1240.0, "completions/mean_length": 420.4464416503906, "completions/mean_terminated_length": 420.4464416503906, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "entropy": 0.5936406254768372, "epoch": 0.18493150684931506, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.1711126186633312, "kl": 0.0011783599620684981, "learning_rate": 4.955479452054794e-07, "loss": -0.0184, "num_tokens": 539890.0, "reward": 0.28918999433517456, "reward_std": 0.044583000242710114, "rewards/check_gptzero_func/mean": 0.28918999433517456, "rewards/check_gptzero_func/std": 0.4131961464881897, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.000464677810669, "sampling/importance_sampling_ratio/min": 0.6267319917678833, "sampling/sampling_logp_difference/max": 0.8106875419616699, "sampling/sampling_logp_difference/mean": 0.021295608952641487, "step": 27 }, { "clip_ratio/high_max": 0.006398193538188934, "clip_ratio/high_mean": 0.002636041957885027, "clip_ratio/low_mean": 0.001462540472857654, "clip_ratio/low_min": 0.0006732873152941465, "clip_ratio/region_mean": 0.004098582547158003, "entropy": 0.5929814577102661, "epoch": 0.1917808219178082, "grad_norm": 1.1384163643758904, "kl": 0.0010130192385986447, "learning_rate": 4.953767123287671e-07, "loss": -0.0195, "step": 28 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1615.0, "completions/max_terminated_length": 1615.0, "completions/mean_length": 567.3928833007812, "completions/mean_terminated_length": 567.3928833007812, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "entropy": 0.6444209814071655, "epoch": 0.19863013698630136, "frac_reward_zero_std": 0.0, "grad_norm": 1.1278627436815056, "kl": 0.0011097121750935912, "learning_rate": 4.952054794520547e-07, "loss": -0.0465, "num_tokens": 576284.0, "reward": 0.11113985627889633, "reward_std": 0.0605621375143528, "rewards/check_gptzero_func/mean": 0.11113985627889633, "rewards/check_gptzero_func/std": 0.1988850235939026, "sampling/importance_sampling_ratio/max": 1.6088849306106567, "sampling/importance_sampling_ratio/mean": 1.0001180171966553, "sampling/importance_sampling_ratio/min": 0.5670191645622253, "sampling/sampling_logp_difference/max": 0.5673620700836182, "sampling/sampling_logp_difference/mean": 0.022898318246006966, "step": 29 }, { "clip_ratio/high_max": 0.004884402267634869, "clip_ratio/high_mean": 0.00308835762552917, "clip_ratio/low_mean": 0.0012085993075743318, "clip_ratio/low_min": 0.0002978850097861141, "clip_ratio/region_mean": 0.004296957049518824, "entropy": 0.6447930932044983, "epoch": 0.2054794520547945, "grad_norm": 1.0760923572839503, "kl": 0.000988772138953209, "learning_rate": 4.950342465753424e-07, "loss": -0.0477, "step": 30 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1342.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 623.3392944335938, "completions/mean_terminated_length": 623.3392944335938, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "entropy": 0.6836190223693848, "epoch": 0.21232876712328766, "frac_reward_zero_std": 0.0, "grad_norm": 1.0373619409403918, "kl": 0.0012793418718501925, "learning_rate": 4.948630136986301e-07, "loss": -0.0051, "num_tokens": 616389.0, "reward": 0.13934187591075897, "reward_std": 0.05689322203397751, "rewards/check_gptzero_func/mean": 0.13934186100959778, "rewards/check_gptzero_func/std": 0.2030991017818451, "sampling/importance_sampling_ratio/max": 1.5075865983963013, "sampling/importance_sampling_ratio/mean": 0.9995235800743103, "sampling/importance_sampling_ratio/min": 0.6267083883285522, "sampling/sampling_logp_difference/max": 0.4672739505767822, "sampling/sampling_logp_difference/mean": 0.023505205288529396, "step": 31 }, { "clip_ratio/high_max": 0.0037497070152312517, "clip_ratio/high_mean": 0.0029490862507373095, "clip_ratio/low_mean": 0.0013899157056584954, "clip_ratio/low_min": 0.000556096201762557, "clip_ratio/region_mean": 0.004339002072811127, "entropy": 0.6841988563537598, "epoch": 0.2191780821917808, "grad_norm": 1.02077201576387, "kl": 0.0011510689510032535, "learning_rate": 4.946917808219177e-07, "loss": -0.006, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 641.2678833007812, "completions/mean_terminated_length": 641.2678833007812, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.6465054154396057, "epoch": 0.22602739726027396, "frac_reward_zero_std": 0.0, "grad_norm": 1.0396435453503725, "kl": 0.0011950855841860175, "learning_rate": 4.945205479452055e-07, "loss": -0.0027, "num_tokens": 657578.0, "reward": 0.1270533800125122, "reward_std": 0.03636755049228668, "rewards/check_gptzero_func/mean": 0.1270533800125122, "rewards/check_gptzero_func/std": 0.27682268619537354, "sampling/importance_sampling_ratio/max": 1.7253142595291138, "sampling/importance_sampling_ratio/mean": 0.9996784925460815, "sampling/importance_sampling_ratio/min": 0.4953335225582123, "sampling/sampling_logp_difference/max": 0.702523946762085, "sampling/sampling_logp_difference/mean": 0.022419389337301254, "step": 33 }, { "clip_ratio/high_max": 0.00413874676451087, "clip_ratio/high_mean": 0.002831868827342987, "clip_ratio/low_mean": 0.0015850607305765152, "clip_ratio/low_min": 0.0007415647269226611, "clip_ratio/region_mean": 0.004416929092258215, "entropy": 0.6456454396247864, "epoch": 0.2328767123287671, "grad_norm": 1.018336716568673, "kl": 0.0011227106442674994, "learning_rate": 4.943493150684931e-07, "loss": -0.004, "step": 34 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 533.0892944335938, "completions/mean_terminated_length": 533.0892944335938, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.6057087182998657, "epoch": 0.23972602739726026, "frac_reward_zero_std": 0.0, "grad_norm": 1.1345205119231363, "kl": 0.0017548695905134082, "learning_rate": 4.941780821917808e-07, "loss": -0.0304, "num_tokens": 692277.0, "reward": 0.14230269193649292, "reward_std": 0.09505175799131393, "rewards/check_gptzero_func/mean": 0.1423027068376541, "rewards/check_gptzero_func/std": 0.2526035010814667, "sampling/importance_sampling_ratio/max": 1.5612157583236694, "sampling/importance_sampling_ratio/mean": 1.0001230239868164, "sampling/importance_sampling_ratio/min": 0.5925313830375671, "sampling/sampling_logp_difference/max": 0.5233514308929443, "sampling/sampling_logp_difference/mean": 0.022182662039995193, "step": 35 }, { "clip_ratio/high_max": 0.00614181999117136, "clip_ratio/high_mean": 0.003327219979837537, "clip_ratio/low_mean": 0.0009940107120200992, "clip_ratio/low_min": 0.0006402048747986555, "clip_ratio/region_mean": 0.004321230109781027, "entropy": 0.6064973473548889, "epoch": 0.2465753424657534, "grad_norm": 1.0800981052595167, "kl": 0.0021832308266311884, "learning_rate": 4.940068493150684e-07, "loss": -0.0315, "step": 36 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1588.0, "completions/max_terminated_length": 1588.0, "completions/mean_length": 706.5535888671875, "completions/mean_terminated_length": 706.5535888671875, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "entropy": 0.664795458316803, "epoch": 0.2534246575342466, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 0.9010803188884506, "kl": 0.0020640019793063402, "learning_rate": 4.938356164383561e-07, "loss": -0.0026, "num_tokens": 737058.0, "reward": 0.14924554526805878, "reward_std": 0.04088061302900314, "rewards/check_gptzero_func/mean": 0.14924553036689758, "rewards/check_gptzero_func/std": 0.29761120676994324, "sampling/importance_sampling_ratio/max": 1.8839212656021118, "sampling/importance_sampling_ratio/mean": 0.9999674558639526, "sampling/importance_sampling_ratio/min": 0.5516126751899719, "sampling/sampling_logp_difference/max": 0.6333553791046143, "sampling/sampling_logp_difference/mean": 0.022508051246404648, "step": 37 }, { "clip_ratio/high_max": 0.0041877506300807, "clip_ratio/high_mean": 0.00259722163900733, "clip_ratio/low_mean": 0.0009633555309846997, "clip_ratio/low_min": 0.00045599634177051485, "clip_ratio/region_mean": 0.003560577053576708, "entropy": 0.6642522811889648, "epoch": 0.2602739726027397, "grad_norm": 0.8738971807228483, "kl": 0.001963850809261203, "learning_rate": 4.936643835616438e-07, "loss": -0.0038, "step": 38 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1083.0, "completions/max_terminated_length": 1083.0, "completions/mean_length": 516.8392944335938, "completions/mean_terminated_length": 516.8392944335938, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.5412200093269348, "epoch": 0.2671232876712329, "frac_reward_zero_std": 1.0, "grad_norm": 0.014033593827429853, "kl": 0.0023920845706015825, "learning_rate": 4.934931506849314e-07, "loss": 0.0, "num_tokens": 771957.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_gptzero_func/mean": 0.0, "rewards/check_gptzero_func/std": 0.0, "sampling/importance_sampling_ratio/max": 1.69455885887146, "sampling/importance_sampling_ratio/mean": 0.999897837638855, "sampling/importance_sampling_ratio/min": 0.5995684862136841, "sampling/sampling_logp_difference/max": 0.5274224281311035, "sampling/sampling_logp_difference/mean": 0.019024215638637543, "step": 39 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "entropy": 0.541813313961029, "epoch": 0.273972602739726, "grad_norm": 0.013882169264202418, "kl": 0.0024999286979436874, "learning_rate": 4.933219178082191e-07, "loss": 0.0, "step": 40 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1548.0, "completions/max_terminated_length": 1548.0, "completions/mean_length": 617.6428833007812, "completions/mean_terminated_length": 617.6428833007812, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.6394982933998108, "epoch": 0.2808219178082192, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.0538399144722606, "kl": 0.003311266889795661, "learning_rate": 4.931506849315068e-07, "loss": 0.0144, "num_tokens": 811407.0, "reward": 0.20052021741867065, "reward_std": 0.04475821182131767, "rewards/check_gptzero_func/mean": 0.20052020251750946, "rewards/check_gptzero_func/std": 0.33382534980773926, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0003221035003662, "sampling/importance_sampling_ratio/min": 0.6218421459197998, "sampling/sampling_logp_difference/max": 0.7299280166625977, "sampling/sampling_logp_difference/mean": 0.022428028285503387, "step": 41 }, { "clip_ratio/high_max": 0.004294917453080416, "clip_ratio/high_mean": 0.0028143117669969797, "clip_ratio/low_mean": 0.0018043611198663712, "clip_ratio/low_min": 0.0008799718343652785, "clip_ratio/region_mean": 0.0046186731196939945, "entropy": 0.6390891671180725, "epoch": 0.2876712328767123, "grad_norm": 1.0085700868626455, "kl": 0.002815553219988942, "learning_rate": 4.929794520547945e-07, "loss": 0.0127, "step": 42 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 593.982177734375, "completions/mean_terminated_length": 593.982177734375, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.6429600715637207, "epoch": 0.2945205479452055, "frac_reward_zero_std": 0.0, "grad_norm": 1.0803369335363933, "kl": 0.0031404283363372087, "learning_rate": 4.928082191780821e-07, "loss": 0.0159, "num_tokens": 849142.0, "reward": 0.15313172340393066, "reward_std": 0.06781160831451416, "rewards/check_gptzero_func/mean": 0.15313172340393066, "rewards/check_gptzero_func/std": 0.2866833508014679, "sampling/importance_sampling_ratio/max": 1.776042103767395, "sampling/importance_sampling_ratio/mean": 0.9995749592781067, "sampling/importance_sampling_ratio/min": 0.39706334471702576, "sampling/sampling_logp_difference/max": 0.9236595034599304, "sampling/sampling_logp_difference/mean": 0.022439327090978622, "step": 43 }, { "clip_ratio/high_max": 0.004273504484444857, "clip_ratio/high_mean": 0.0026710222009569407, "clip_ratio/low_mean": 0.001637372886762023, "clip_ratio/low_min": 0.0006798096583224833, "clip_ratio/region_mean": 0.004308394622057676, "entropy": 0.6418050527572632, "epoch": 0.3013698630136986, "grad_norm": 1.0261699171323604, "kl": 0.0029596316162496805, "learning_rate": 4.926369863013698e-07, "loss": 0.0143, "step": 44 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 542.6785888671875, "completions/mean_terminated_length": 542.6785888671875, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.6867976784706116, "epoch": 0.3082191780821918, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.154107005194104, "kl": 0.0035767932422459126, "learning_rate": 4.924657534246575e-07, "loss": 0.0018, "num_tokens": 885030.0, "reward": 0.1847611367702484, "reward_std": 0.03608318418264389, "rewards/check_gptzero_func/mean": 0.1847611516714096, "rewards/check_gptzero_func/std": 0.32559606432914734, "sampling/importance_sampling_ratio/max": 1.7259352207183838, "sampling/importance_sampling_ratio/mean": 0.9999464750289917, "sampling/importance_sampling_ratio/min": 0.495414674282074, "sampling/sampling_logp_difference/max": 0.7023601531982422, "sampling/sampling_logp_difference/mean": 0.022967509925365448, "step": 45 }, { "clip_ratio/high_max": 0.0039024390280246735, "clip_ratio/high_mean": 0.003184092929586768, "clip_ratio/low_mean": 0.001760590705089271, "clip_ratio/low_min": 0.00047744091716594994, "clip_ratio/region_mean": 0.00494468305259943, "entropy": 0.6867976784706116, "epoch": 0.3150684931506849, "grad_norm": 1.1053890857447555, "kl": 0.0033275808673352003, "learning_rate": 4.922945205479451e-07, "loss": -0.0001, "step": 46 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1680.0, "completions/max_terminated_length": 1680.0, "completions/mean_length": 683.732177734375, "completions/mean_terminated_length": 683.732177734375, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.6644364595413208, "epoch": 0.3219178082191781, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.009483787982477, "kl": 0.00411708140745759, "learning_rate": 4.921232876712328e-07, "loss": -0.0146, "num_tokens": 928401.0, "reward": 0.19243761897087097, "reward_std": 0.07404158264398575, "rewards/check_gptzero_func/mean": 0.19243761897087097, "rewards/check_gptzero_func/std": 0.2956378161907196, "sampling/importance_sampling_ratio/max": 1.9693632125854492, "sampling/importance_sampling_ratio/mean": 1.0001431703567505, "sampling/importance_sampling_ratio/min": 0.3859553039073944, "sampling/sampling_logp_difference/max": 0.9520337581634521, "sampling/sampling_logp_difference/mean": 0.023075612261891365, "step": 47 }, { "clip_ratio/high_max": 0.003946329932659864, "clip_ratio/high_mean": 0.003057846101000905, "clip_ratio/low_mean": 0.001286154380068183, "clip_ratio/low_min": 0.000631412782240659, "clip_ratio/region_mean": 0.004344000481069088, "entropy": 0.6639639735221863, "epoch": 0.3287671232876712, "grad_norm": 0.989617267387514, "kl": 0.0038883991073817015, "learning_rate": 4.919520547945206e-07, "loss": -0.0163, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1440.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 620.8214721679688, "completions/mean_terminated_length": 620.8214721679688, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "entropy": 0.6441218256950378, "epoch": 0.3356164383561644, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.0321760822732853, "kl": 0.0038268009666353464, "learning_rate": 4.917808219178081e-07, "loss": -0.0061, "num_tokens": 967709.0, "reward": 0.27959737181663513, "reward_std": 0.04418365657329559, "rewards/check_gptzero_func/mean": 0.27959737181663513, "rewards/check_gptzero_func/std": 0.3830104172229767, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0000200271606445, "sampling/importance_sampling_ratio/min": 0.46553704142570496, "sampling/sampling_logp_difference/max": 0.9890339374542236, "sampling/sampling_logp_difference/mean": 0.02230379916727543, "step": 49 }, { "clip_ratio/high_max": 0.005151515360921621, "clip_ratio/high_mean": 0.002983740298077464, "clip_ratio/low_mean": 0.0013056638417765498, "clip_ratio/low_min": 0.0005087332683615386, "clip_ratio/region_mean": 0.004289404489099979, "entropy": 0.6433120965957642, "epoch": 0.3424657534246575, "grad_norm": 0.9947223005038776, "kl": 0.004071378614753485, "learning_rate": 4.916095890410959e-07, "loss": -0.0077, "step": 50 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1790.0, "completions/max_terminated_length": 1790.0, "completions/mean_length": 760.1428833007812, "completions/mean_terminated_length": 760.1428833007812, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.6986328959465027, "epoch": 0.3493150684931507, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.0088085669376636, "kl": 0.004396843258291483, "learning_rate": 4.914383561643836e-07, "loss": 0.0015, "num_tokens": 1015281.0, "reward": 0.17380915582180023, "reward_std": 0.05375532805919647, "rewards/check_gptzero_func/mean": 0.17380915582180023, "rewards/check_gptzero_func/std": 0.3028184175491333, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.999697208404541, "sampling/importance_sampling_ratio/min": 0.48710179328918457, "sampling/sampling_logp_difference/max": 2.140697479248047, "sampling/sampling_logp_difference/mean": 0.023295851424336433, "step": 51 }, { "clip_ratio/high_max": 0.005134400445967913, "clip_ratio/high_mean": 0.002996662398800254, "clip_ratio/low_mean": 0.0010972967138513923, "clip_ratio/low_min": 0.0006519967573694885, "clip_ratio/region_mean": 0.004093958996236324, "entropy": 0.698287844657898, "epoch": 0.3561643835616438, "grad_norm": 1.0049811790405436, "kl": 0.00405140221118927, "learning_rate": 4.912671232876713e-07, "loss": -0.0003, "step": 52 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2173.0, "completions/max_terminated_length": 2173.0, "completions/mean_length": 645.4464721679688, "completions/mean_terminated_length": 645.4464721679688, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.6849231719970703, "epoch": 0.363013698630137, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3140679657695893, "kl": 0.003894540946930647, "learning_rate": 4.910958904109589e-07, "loss": -0.0136, "num_tokens": 1056100.0, "reward": 0.22083687782287598, "reward_std": 0.048538271337747574, "rewards/check_gptzero_func/mean": 0.22083686292171478, "rewards/check_gptzero_func/std": 0.355166494846344, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0002272129058838, "sampling/importance_sampling_ratio/min": 0.5686317086219788, "sampling/sampling_logp_difference/max": 0.8869340419769287, "sampling/sampling_logp_difference/mean": 0.022704031318426132, "step": 53 }, { "clip_ratio/high_max": 0.0050231837667524815, "clip_ratio/high_mean": 0.0033103046007454395, "clip_ratio/low_mean": 0.001185024157166481, "clip_ratio/low_min": 0.0006509357481263578, "clip_ratio/region_mean": 0.004495329223573208, "entropy": 0.685555636882782, "epoch": 0.3698630136986301, "grad_norm": 0.9900045898800945, "kl": 0.004090186674147844, "learning_rate": 4.909246575342466e-07, "loss": -0.0152, "step": 54 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 768.9464721679688, "completions/mean_terminated_length": 768.9464721679688, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "entropy": 0.6904276609420776, "epoch": 0.3767123287671233, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 0.8866221271270188, "kl": 0.0038594731595367193, "learning_rate": 4.907534246575343e-07, "loss": 0.0005, "num_tokens": 1104237.0, "reward": 0.16240032017230988, "reward_std": 0.05175558105111122, "rewards/check_gptzero_func/mean": 0.16240032017230988, "rewards/check_gptzero_func/std": 0.2842916250228882, "sampling/importance_sampling_ratio/max": 1.6011669635772705, "sampling/importance_sampling_ratio/mean": 0.9997668266296387, "sampling/importance_sampling_ratio/min": 0.6118178963661194, "sampling/sampling_logp_difference/max": 0.4913206100463867, "sampling/sampling_logp_difference/mean": 0.023155955597758293, "step": 55 }, { "clip_ratio/high_max": 0.0038701994344592094, "clip_ratio/high_mean": 0.003147991606965661, "clip_ratio/low_mean": 0.0009888341883197427, "clip_ratio/low_min": 0.00020977553504053503, "clip_ratio/region_mean": 0.0041368259117007256, "entropy": 0.6904276609420776, "epoch": 0.3835616438356164, "grad_norm": 0.8654601817463383, "kl": 0.0038540817331522703, "learning_rate": 4.905821917808219e-07, "loss": -0.0012, "step": 56 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2014.0, "completions/max_terminated_length": 2014.0, "completions/mean_length": 633.7678833007812, "completions/mean_terminated_length": 633.7678833007812, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.6680780053138733, "epoch": 0.3904109589041096, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.0018073661483344, "kl": 0.005151956342160702, "learning_rate": 4.904109589041096e-07, "loss": -0.01, "num_tokens": 1144446.0, "reward": 0.2535025477409363, "reward_std": 0.07743978500366211, "rewards/check_gptzero_func/mean": 0.2535025477409363, "rewards/check_gptzero_func/std": 0.3884026110172272, "sampling/importance_sampling_ratio/max": 1.5756206512451172, "sampling/importance_sampling_ratio/mean": 0.999597430229187, "sampling/importance_sampling_ratio/min": 0.4838891327381134, "sampling/sampling_logp_difference/max": 0.7258994579315186, "sampling/sampling_logp_difference/mean": 0.02228192798793316, "step": 57 }, { "clip_ratio/high_max": 0.005239991471171379, "clip_ratio/high_mean": 0.0036126081831753254, "clip_ratio/low_mean": 0.0016954370075836778, "clip_ratio/low_min": 0.001491547911427915, "clip_ratio/region_mean": 0.005308046005666256, "entropy": 0.6696681380271912, "epoch": 0.3972602739726027, "grad_norm": 0.9547859435945957, "kl": 0.005825448781251907, "learning_rate": 4.902397260273973e-07, "loss": -0.0119, "step": 58 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 742.7142944335938, "completions/mean_terminated_length": 742.7142944335938, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.6784703135490417, "epoch": 0.4041095890410959, "frac_reward_zero_std": 0.0, "grad_norm": 1.0053119499856833, "kl": 0.006023472640663385, "learning_rate": 4.90068493150685e-07, "loss": 0.0077, "num_tokens": 1190944.0, "reward": 0.10585577040910721, "reward_std": 0.042458731681108475, "rewards/check_gptzero_func/mean": 0.10585576295852661, "rewards/check_gptzero_func/std": 0.2394891232252121, "sampling/importance_sampling_ratio/max": 1.5969221591949463, "sampling/importance_sampling_ratio/mean": 0.9998756647109985, "sampling/importance_sampling_ratio/min": 0.4208536446094513, "sampling/sampling_logp_difference/max": 0.8654701709747314, "sampling/sampling_logp_difference/mean": 0.02306969091296196, "step": 59 }, { "clip_ratio/high_max": 0.0037647902499884367, "clip_ratio/high_mean": 0.0027994359843432903, "clip_ratio/low_mean": 0.0013811250682920218, "clip_ratio/low_min": 0.0007598784286528826, "clip_ratio/region_mean": 0.004180560819804668, "entropy": 0.6780605316162109, "epoch": 0.410958904109589, "grad_norm": 0.9021931845508864, "kl": 0.005869701504707336, "learning_rate": 4.898972602739726e-07, "loss": 0.0061, "step": 60 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1584.0, "completions/max_terminated_length": 1584.0, "completions/mean_length": 572.2857666015625, "completions/mean_terminated_length": 572.2857666015625, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.64687579870224, "epoch": 0.4178082191780822, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.1272567187923517, "kl": 0.00625581806525588, "learning_rate": 4.897260273972603e-07, "loss": 0.016, "num_tokens": 1227474.0, "reward": 0.3151547610759735, "reward_std": 0.052352044731378555, "rewards/check_gptzero_func/mean": 0.3151547312736511, "rewards/check_gptzero_func/std": 0.3952562212944031, "sampling/importance_sampling_ratio/max": 1.9442535638809204, "sampling/importance_sampling_ratio/mean": 1.0000123977661133, "sampling/importance_sampling_ratio/min": 0.2788597047328949, "sampling/sampling_logp_difference/max": 1.2770464420318604, "sampling/sampling_logp_difference/mean": 0.022612636908888817, "step": 61 }, { "clip_ratio/high_max": 0.005116725340485573, "clip_ratio/high_mean": 0.0038332000840455294, "clip_ratio/low_mean": 0.0015817114617675543, "clip_ratio/low_min": 0.000631578965112567, "clip_ratio/region_mean": 0.005414911545813084, "entropy": 0.64687579870224, "epoch": 0.4246575342465753, "grad_norm": 1.0908301852651188, "kl": 0.006817488931119442, "learning_rate": 4.89554794520548e-07, "loss": 0.0139, "step": 62 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1859.0, "completions/max_terminated_length": 1859.0, "completions/mean_length": 637.5714721679688, "completions/mean_terminated_length": 637.5714721679688, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.644902229309082, "epoch": 0.4315068493150685, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 0.9767453782933141, "kl": 0.007089635822921991, "learning_rate": 4.893835616438356e-07, "loss": -0.0019, "num_tokens": 1267722.0, "reward": 0.2532567083835602, "reward_std": 0.08535689860582352, "rewards/check_gptzero_func/mean": 0.2532567083835602, "rewards/check_gptzero_func/std": 0.35002613067626953, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9998102188110352, "sampling/importance_sampling_ratio/min": 0.20108339190483093, "sampling/sampling_logp_difference/max": 1.6040356159210205, "sampling/sampling_logp_difference/mean": 0.022142896428704262, "step": 63 }, { "clip_ratio/high_max": 0.0035001030191779137, "clip_ratio/high_mean": 0.002767337253317237, "clip_ratio/low_mean": 0.0018000273266807199, "clip_ratio/low_min": 0.0014757424360141158, "clip_ratio/region_mean": 0.004567364696413279, "entropy": 0.6457486748695374, "epoch": 0.4383561643835616, "grad_norm": 0.9400426106474881, "kl": 0.00732081662863493, "learning_rate": 4.892123287671233e-07, "loss": -0.0037, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 582.6607666015625, "completions/mean_terminated_length": 582.6607666015625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.6378312706947327, "epoch": 0.4452054794520548, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.0483541121946258, "kl": 0.006408101413398981, "learning_rate": 4.89041095890411e-07, "loss": 0.0044, "num_tokens": 1305309.0, "reward": 0.21940286457538605, "reward_std": 0.04021396487951279, "rewards/check_gptzero_func/mean": 0.21940286457538605, "rewards/check_gptzero_func/std": 0.3583015501499176, "sampling/importance_sampling_ratio/max": 1.6360098123550415, "sampling/importance_sampling_ratio/mean": 1.0002219676971436, "sampling/importance_sampling_ratio/min": 0.6172001957893372, "sampling/sampling_logp_difference/max": 0.492260217666626, "sampling/sampling_logp_difference/mean": 0.022142041474580765, "step": 65 }, { "clip_ratio/high_max": 0.005433306097984314, "clip_ratio/high_mean": 0.0037320181727409363, "clip_ratio/low_mean": 0.002090357942506671, "clip_ratio/low_min": 0.0015090543311089277, "clip_ratio/region_mean": 0.005822376348078251, "entropy": 0.6371781229972839, "epoch": 0.4520547945205479, "grad_norm": 1.0221652556591856, "kl": 0.006535905878990889, "learning_rate": 4.888698630136986e-07, "loss": 0.0021, "step": 66 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 555.8928833007812, "completions/mean_terminated_length": 555.8928833007812, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.6483883261680603, "epoch": 0.4589041095890411, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.0881166362644108, "kl": 0.008647761307656765, "learning_rate": 4.886986301369863e-07, "loss": -0.0056, "num_tokens": 1341399.0, "reward": 0.25955525040626526, "reward_std": 0.07806221395730972, "rewards/check_gptzero_func/mean": 0.25955525040626526, "rewards/check_gptzero_func/std": 0.35352927446365356, "sampling/importance_sampling_ratio/max": 1.602926254272461, "sampling/importance_sampling_ratio/mean": 0.9998186826705933, "sampling/importance_sampling_ratio/min": 0.5358298420906067, "sampling/sampling_logp_difference/max": 0.6239385604858398, "sampling/sampling_logp_difference/mean": 0.022637108340859413, "step": 67 }, { "clip_ratio/high_max": 0.00663074990734458, "clip_ratio/high_mean": 0.0037958617322146893, "clip_ratio/low_mean": 0.0020688942167907953, "clip_ratio/low_min": 0.0012174336006864905, "clip_ratio/region_mean": 0.0058647566474974155, "entropy": 0.6478776931762695, "epoch": 0.4657534246575342, "grad_norm": 1.0432084818500458, "kl": 0.008365168236196041, "learning_rate": 4.88527397260274e-07, "loss": -0.0077, "step": 68 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 705.7678833007812, "completions/mean_terminated_length": 705.7678833007812, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "entropy": 0.6649653315544128, "epoch": 0.4726027397260274, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 0.9743852490549264, "kl": 0.006000231951475143, "learning_rate": 4.883561643835617e-07, "loss": 0.0033, "num_tokens": 1385496.0, "reward": 0.207183375954628, "reward_std": 0.036608025431632996, "rewards/check_gptzero_func/mean": 0.2071833610534668, "rewards/check_gptzero_func/std": 0.33812415599823, "sampling/importance_sampling_ratio/max": 1.601823329925537, "sampling/importance_sampling_ratio/mean": 0.9997014999389648, "sampling/importance_sampling_ratio/min": 0.36031386256217957, "sampling/sampling_logp_difference/max": 1.0207798480987549, "sampling/sampling_logp_difference/mean": 0.02260447107255459, "step": 69 }, { "clip_ratio/high_max": 0.004378621932119131, "clip_ratio/high_mean": 0.0032469704747200012, "clip_ratio/low_mean": 0.001561591518111527, "clip_ratio/low_min": 0.000811688310932368, "clip_ratio/region_mean": 0.004808562342077494, "entropy": 0.6649653315544128, "epoch": 0.4794520547945205, "grad_norm": 0.9360221303581099, "kl": 0.006035739090293646, "learning_rate": 4.881849315068493e-07, "loss": 0.0014, "step": 70 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1320.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 603.9464721679688, "completions/mean_terminated_length": 603.9464721679688, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "entropy": 0.6563753485679626, "epoch": 0.4863013698630137, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.02693665099263, "kl": 0.006266315933316946, "learning_rate": 4.88013698630137e-07, "loss": 0.0085, "num_tokens": 1424307.0, "reward": 0.2624198794364929, "reward_std": 0.038884952664375305, "rewards/check_gptzero_func/mean": 0.2624198794364929, "rewards/check_gptzero_func/std": 0.3591133654117584, "sampling/importance_sampling_ratio/max": 1.528120756149292, "sampling/importance_sampling_ratio/mean": 1.000186562538147, "sampling/importance_sampling_ratio/min": 0.6610044836997986, "sampling/sampling_logp_difference/max": 0.4240386486053467, "sampling/sampling_logp_difference/mean": 0.022347530350089073, "step": 71 }, { "clip_ratio/high_max": 0.004208110272884369, "clip_ratio/high_mean": 0.0033236711751669645, "clip_ratio/low_mean": 0.0017943513812497258, "clip_ratio/low_min": 0.0007892659632489085, "clip_ratio/region_mean": 0.005118022672832012, "entropy": 0.6572615504264832, "epoch": 0.4931506849315068, "grad_norm": 0.9906379291644603, "kl": 0.006440569646656513, "learning_rate": 4.878424657534246e-07, "loss": 0.0062, "step": 72 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 675.107177734375, "completions/mean_terminated_length": 675.107177734375, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "entropy": 0.662814736366272, "epoch": 0.5, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 0.9911299480668772, "kl": 0.007091199513524771, "learning_rate": 4.876712328767123e-07, "loss": 0.0019, "num_tokens": 1467045.0, "reward": 0.20337417721748352, "reward_std": 0.08108475059270859, "rewards/check_gptzero_func/mean": 0.20337416231632233, "rewards/check_gptzero_func/std": 0.293454110622406, "sampling/importance_sampling_ratio/max": 1.5829176902770996, "sampling/importance_sampling_ratio/mean": 0.9999324083328247, "sampling/importance_sampling_ratio/min": 0.35348090529441833, "sampling/sampling_logp_difference/max": 1.0399258136749268, "sampling/sampling_logp_difference/mean": 0.022807855159044266, "step": 73 }, { "clip_ratio/high_max": 0.005312367342412472, "clip_ratio/high_mean": 0.0033089013304561377, "clip_ratio/low_mean": 0.0014246442588046193, "clip_ratio/low_min": 0.0006609385600313544, "clip_ratio/region_mean": 0.0047335452400147915, "entropy": 0.6635065674781799, "epoch": 0.5068493150684932, "grad_norm": 0.9558095824798101, "kl": 0.006769455969333649, "learning_rate": 4.875e-07, "loss": -0.0002, "step": 74 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1415.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 733.8214721679688, "completions/mean_terminated_length": 733.8214721679688, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "entropy": 0.7195268273353577, "epoch": 0.5136986301369864, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 0.9458739533534983, "kl": 0.005859799217432737, "learning_rate": 4.873287671232876e-07, "loss": 0.0044, "num_tokens": 1512473.0, "reward": 0.13825276494026184, "reward_std": 0.04305782541632652, "rewards/check_gptzero_func/mean": 0.13825276494026184, "rewards/check_gptzero_func/std": 0.2318185418844223, "sampling/importance_sampling_ratio/max": 1.6105501651763916, "sampling/importance_sampling_ratio/mean": 0.9997345209121704, "sampling/importance_sampling_ratio/min": 0.6139755845069885, "sampling/sampling_logp_difference/max": 0.48780012130737305, "sampling/sampling_logp_difference/mean": 0.02372821420431137, "step": 75 }, { "clip_ratio/high_max": 0.005211526528000832, "clip_ratio/high_mean": 0.0037040903698652983, "clip_ratio/low_mean": 0.0014706032816320658, "clip_ratio/low_min": 0.00048088483163155615, "clip_ratio/region_mean": 0.005174693651497364, "entropy": 0.7199868559837341, "epoch": 0.5205479452054794, "grad_norm": 0.9181788088197832, "kl": 0.00616594310849905, "learning_rate": 4.871575342465753e-07, "loss": 0.0024, "step": 76 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 587.1964721679688, "completions/mean_terminated_length": 587.1964721679688, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.5900171399116516, "epoch": 0.5273972602739726, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.129049007171609, "kl": 0.006128288805484772, "learning_rate": 4.86986301369863e-07, "loss": 0.0045, "num_tokens": 1550360.0, "reward": 0.2757817506790161, "reward_std": 0.09551478177309036, "rewards/check_gptzero_func/mean": 0.2757817506790161, "rewards/check_gptzero_func/std": 0.3353048264980316, "sampling/importance_sampling_ratio/max": 1.659281849861145, "sampling/importance_sampling_ratio/mean": 1.000274419784546, "sampling/importance_sampling_ratio/min": 0.5390819907188416, "sampling/sampling_logp_difference/max": 0.6178876161575317, "sampling/sampling_logp_difference/mean": 0.021612754091620445, "step": 77 }, { "clip_ratio/high_max": 0.005658129695802927, "clip_ratio/high_mean": 0.003707077819854021, "clip_ratio/low_mean": 0.0025792084634304047, "clip_ratio/low_min": 0.0009422533330507576, "clip_ratio/region_mean": 0.006286286748945713, "entropy": 0.5887832641601562, "epoch": 0.5342465753424658, "grad_norm": 1.0271862172424027, "kl": 0.006404409650713205, "learning_rate": 4.868150684931507e-07, "loss": 0.002, "step": 78 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 688.3392944335938, "completions/mean_terminated_length": 688.3392944335938, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.6785265207290649, "epoch": 0.541095890410959, "frac_reward_zero_std": 0.0, "grad_norm": 1221.3468258091352, "kl": 9.698434829711914, "learning_rate": 4.866438356164383e-07, "loss": 0.0599, "num_tokens": 1594043.0, "reward": 0.1271028220653534, "reward_std": 0.07035510987043381, "rewards/check_gptzero_func/mean": 0.1271028220653534, "rewards/check_gptzero_func/std": 0.24742399156093597, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0003200769424438, "sampling/importance_sampling_ratio/min": 3.152334102196619e-05, "sampling/sampling_logp_difference/max": 10.364782333374023, "sampling/sampling_logp_difference/mean": 0.02375609613955021, "step": 79 }, { "clip_ratio/high_max": 0.0030534351244568825, "clip_ratio/high_mean": 0.00220930902287364, "clip_ratio/low_mean": 0.0011879971716552973, "clip_ratio/low_min": 0.0005858230870217085, "clip_ratio/region_mean": 0.0033973061945289373, "entropy": 0.6781052947044373, "epoch": 0.547945205479452, "grad_norm": 19.600973012991325, "kl": 1.4914833307266235, "learning_rate": 4.86472602739726e-07, "loss": 0.011, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1377.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 719.4642944335938, "completions/mean_terminated_length": 719.4642944335938, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.7031124830245972, "epoch": 0.5547945205479452, "frac_reward_zero_std": 0.0, "grad_norm": 0.9805246160630438, "kl": 0.006405664142221212, "learning_rate": 4.863013698630137e-07, "loss": -0.0115, "num_tokens": 1639431.0, "reward": 0.11926589906215668, "reward_std": 0.045857932418584824, "rewards/check_gptzero_func/mean": 0.11926589906215668, "rewards/check_gptzero_func/std": 0.2415137141942978, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.000449538230896, "sampling/importance_sampling_ratio/min": 0.4767449200153351, "sampling/sampling_logp_difference/max": 0.7407736778259277, "sampling/sampling_logp_difference/mean": 0.023195646703243256, "step": 81 }, { "clip_ratio/high_max": 0.003914660308510065, "clip_ratio/high_mean": 0.0028178843203932047, "clip_ratio/low_mean": 0.0012973027769476175, "clip_ratio/low_min": 0.0008210180676542222, "clip_ratio/region_mean": 0.004115187097340822, "entropy": 0.7021611928939819, "epoch": 0.5616438356164384, "grad_norm": 0.9360544679271781, "kl": 0.006309481803327799, "learning_rate": 4.861301369863013e-07, "loss": -0.0136, "step": 82 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2436.0, "completions/max_terminated_length": 2436.0, "completions/mean_length": 751.1250610351562, "completions/mean_terminated_length": 751.1250610351562, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "entropy": 0.6657801866531372, "epoch": 0.5684931506849316, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 0.9489544862301276, "kl": 0.0076403990387916565, "learning_rate": 4.85958904109589e-07, "loss": -0.0032, "num_tokens": 1687260.0, "reward": 0.15187732875347137, "reward_std": 0.07166766375303268, "rewards/check_gptzero_func/mean": 0.15187731385231018, "rewards/check_gptzero_func/std": 0.2912762463092804, "sampling/importance_sampling_ratio/max": 1.592954397201538, "sampling/importance_sampling_ratio/mean": 0.9998466372489929, "sampling/importance_sampling_ratio/min": 0.609511137008667, "sampling/sampling_logp_difference/max": 0.4950981140136719, "sampling/sampling_logp_difference/mean": 0.022485269233584404, "step": 83 }, { "clip_ratio/high_max": 0.004973749630153179, "clip_ratio/high_mean": 0.0036566380877047777, "clip_ratio/low_mean": 0.0013774337712675333, "clip_ratio/low_min": 0.0009476106497459114, "clip_ratio/region_mean": 0.005034071393311024, "entropy": 0.6661725640296936, "epoch": 0.5753424657534246, "grad_norm": 0.9101679329711533, "kl": 0.008020752109587193, "learning_rate": 4.857876712328767e-07, "loss": -0.0054, "step": 84 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1587.0, "completions/max_terminated_length": 1587.0, "completions/mean_length": 527.4464721679688, "completions/mean_terminated_length": 527.4464721679688, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.6236804127693176, "epoch": 0.5821917808219178, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.2265373623451183, "kl": 0.007543736137449741, "learning_rate": 4.856164383561643e-07, "loss": -0.0053, "num_tokens": 1721429.0, "reward": 0.2906836271286011, "reward_std": 0.07395296543836594, "rewards/check_gptzero_func/mean": 0.2906835973262787, "rewards/check_gptzero_func/std": 0.38238611817359924, "sampling/importance_sampling_ratio/max": 1.7278878688812256, "sampling/importance_sampling_ratio/mean": 1.0000780820846558, "sampling/importance_sampling_ratio/min": 0.607107937335968, "sampling/sampling_logp_difference/max": 0.5468997955322266, "sampling/sampling_logp_difference/mean": 0.021995307877659798, "step": 85 }, { "clip_ratio/high_max": 0.005591219756752253, "clip_ratio/high_mean": 0.003488959511741996, "clip_ratio/low_mean": 0.00213831989094615, "clip_ratio/low_min": 0.0009578543831594288, "clip_ratio/region_mean": 0.005627279169857502, "entropy": 0.6233993172645569, "epoch": 0.589041095890411, "grad_norm": 387205.70179431926, "kl": 2497.133056640625, "learning_rate": 4.85445205479452e-07, "loss": 28.5591, "step": 86 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1534.0, "completions/max_terminated_length": 1534.0, "completions/mean_length": 715.6250610351562, "completions/mean_terminated_length": 715.6250610351562, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.6891322731971741, "epoch": 0.5958904109589042, "frac_reward_zero_std": 0.0, "grad_norm": 1.019943832833117, "kl": 0.006356666795909405, "learning_rate": 4.852739726027397e-07, "loss": -0.0083, "num_tokens": 1766514.0, "reward": 0.21964427828788757, "reward_std": 0.06310787051916122, "rewards/check_gptzero_func/mean": 0.21964427828788757, "rewards/check_gptzero_func/std": 0.3060833811759949, "sampling/importance_sampling_ratio/max": 1.6570916175842285, "sampling/importance_sampling_ratio/mean": 0.9994074702262878, "sampling/importance_sampling_ratio/min": 0.5129395127296448, "sampling/sampling_logp_difference/max": 0.6675972938537598, "sampling/sampling_logp_difference/mean": 0.02291073650121689, "step": 87 }, { "clip_ratio/high_max": 0.005698529537767172, "clip_ratio/high_mean": 0.0036629175301641226, "clip_ratio/low_mean": 0.0012402825523167849, "clip_ratio/low_min": 0.0009522297768853605, "clip_ratio/region_mean": 0.0049032000824809074, "entropy": 0.6905957460403442, "epoch": 0.6027397260273972, "grad_norm": 0.9741746846351601, "kl": 0.00650844955816865, "learning_rate": 4.851027397260274e-07, "loss": -0.0106, "step": 88 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2085.0, "completions/max_terminated_length": 2085.0, "completions/mean_length": 501.9107360839844, "completions/mean_terminated_length": 501.9107360839844, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.5722449421882629, "epoch": 0.6095890410958904, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.2685605860778817, "kl": 0.008846515789628029, "learning_rate": 4.84931506849315e-07, "loss": 0.004, "num_tokens": 1799597.0, "reward": 0.34518441557884216, "reward_std": 0.08718346804380417, "rewards/check_gptzero_func/mean": 0.3451843857765198, "rewards/check_gptzero_func/std": 0.3938176929950714, "sampling/importance_sampling_ratio/max": 1.6018961668014526, "sampling/importance_sampling_ratio/mean": 1.0000609159469604, "sampling/importance_sampling_ratio/min": 0.47708365321159363, "sampling/sampling_logp_difference/max": 0.7400634288787842, "sampling/sampling_logp_difference/mean": 0.020684462040662766, "step": 89 }, { "clip_ratio/high_max": 0.0047095762565732, "clip_ratio/high_mean": 0.0031285008881241083, "clip_ratio/low_mean": 0.002352690091356635, "clip_ratio/low_min": 0.0014636525884270668, "clip_ratio/region_mean": 0.005481190979480743, "entropy": 0.5719417929649353, "epoch": 0.6164383561643836, "grad_norm": 1.1468988800638034, "kl": 0.008698915131390095, "learning_rate": 4.847602739726027e-07, "loss": 0.0015, "step": 90 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1064.0, "completions/max_terminated_length": 1064.0, "completions/mean_length": 473.732177734375, "completions/mean_terminated_length": 473.732177734375, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.652260422706604, "epoch": 0.6232876712328768, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.2145527137597258, "kl": 0.011849612928926945, "learning_rate": 4.845890410958904e-07, "loss": -0.0085, "num_tokens": 1830616.0, "reward": 0.33061403036117554, "reward_std": 0.028385214507579803, "rewards/check_gptzero_func/mean": 0.33061403036117554, "rewards/check_gptzero_func/std": 0.4207037389278412, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9999002814292908, "sampling/importance_sampling_ratio/min": 0.597367525100708, "sampling/sampling_logp_difference/max": 0.8979051113128662, "sampling/sampling_logp_difference/mean": 0.022179046645760536, "step": 91 }, { "clip_ratio/high_max": 0.0052154818549752235, "clip_ratio/high_mean": 0.0039109704084694386, "clip_ratio/low_mean": 0.0017917242366820574, "clip_ratio/low_min": 0.00046274872147478163, "clip_ratio/region_mean": 0.005702694412320852, "entropy": 0.6527892351150513, "epoch": 0.6301369863013698, "grad_norm": 1.0723060877053083, "kl": 0.012745736166834831, "learning_rate": 4.84417808219178e-07, "loss": -0.0115, "step": 92 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 528.1607666015625, "completions/mean_terminated_length": 528.1607666015625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.6045703887939453, "epoch": 0.636986301369863, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.0848697909780816, "kl": 0.00996867660433054, "learning_rate": 4.842465753424657e-07, "loss": -0.0013, "num_tokens": 1865089.0, "reward": 0.3733298182487488, "reward_std": 0.05819503590464592, "rewards/check_gptzero_func/mean": 0.3733297884464264, "rewards/check_gptzero_func/std": 0.40319404006004333, "sampling/importance_sampling_ratio/max": 1.7593635320663452, "sampling/importance_sampling_ratio/mean": 0.9997335076332092, "sampling/importance_sampling_ratio/min": 0.6210815906524658, "sampling/sampling_logp_difference/max": 0.5649521350860596, "sampling/sampling_logp_difference/mean": 0.022076386958360672, "step": 93 }, { "clip_ratio/high_max": 0.005675146821886301, "clip_ratio/high_mean": 0.0040925429202616215, "clip_ratio/low_mean": 0.0022936868481338024, "clip_ratio/low_min": 0.0009057971183210611, "clip_ratio/region_mean": 0.006386229302734137, "entropy": 0.6038605570793152, "epoch": 0.6438356164383562, "grad_norm": 1.0338016114209552, "kl": 0.010059396736323833, "learning_rate": 4.840753424657534e-07, "loss": -0.0043, "step": 94 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2178.0, "completions/max_terminated_length": 2178.0, "completions/mean_length": 803.482177734375, "completions/mean_terminated_length": 803.482177734375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.7016928791999817, "epoch": 0.6506849315068494, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 0.9122734884445866, "kl": 0.00785041879862547, "learning_rate": 4.839041095890411e-07, "loss": 0.0011, "num_tokens": 1914944.0, "reward": 0.19917568564414978, "reward_std": 0.0815139189362526, "rewards/check_gptzero_func/mean": 0.1991756707429886, "rewards/check_gptzero_func/std": 0.29049330949783325, "sampling/importance_sampling_ratio/max": 1.9037479162216187, "sampling/importance_sampling_ratio/mean": 1.000402808189392, "sampling/importance_sampling_ratio/min": 0.6302492618560791, "sampling/sampling_logp_difference/max": 0.643824577331543, "sampling/sampling_logp_difference/mean": 0.023212246596813202, "step": 95 }, { "clip_ratio/high_max": 0.0038794775027781725, "clip_ratio/high_mean": 0.0029119860846549273, "clip_ratio/low_mean": 0.0014363705413416028, "clip_ratio/low_min": 0.0006617038743570447, "clip_ratio/region_mean": 0.004348356742411852, "entropy": 0.702284038066864, "epoch": 0.6575342465753424, "grad_norm": 0.8764740126734416, "kl": 0.008375348523259163, "learning_rate": 4.837328767123287e-07, "loss": -0.0012, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1211.0, "completions/max_terminated_length": 1211.0, "completions/mean_length": 541.9285888671875, "completions/mean_terminated_length": 541.9285888671875, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.6563766598701477, "epoch": 0.6643835616438356, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.150932447046338, "kl": 0.011839187704026699, "learning_rate": 4.835616438356164e-07, "loss": 0.0017, "num_tokens": 1950688.0, "reward": 0.2847343683242798, "reward_std": 0.05218537896871567, "rewards/check_gptzero_func/mean": 0.2847343683242798, "rewards/check_gptzero_func/std": 0.3633427917957306, "sampling/importance_sampling_ratio/max": 1.6625174283981323, "sampling/importance_sampling_ratio/mean": 1.000138759613037, "sampling/importance_sampling_ratio/min": 0.5748390555381775, "sampling/sampling_logp_difference/max": 0.5536651611328125, "sampling/sampling_logp_difference/mean": 0.0220990888774395, "step": 97 }, { "clip_ratio/high_max": 0.004444444552063942, "clip_ratio/high_mean": 0.003067860146984458, "clip_ratio/low_mean": 0.0018514657858759165, "clip_ratio/low_min": 0.0013481631176546216, "clip_ratio/region_mean": 0.004919325467199087, "entropy": 0.65687495470047, "epoch": 0.6712328767123288, "grad_norm": 1.159553681219553, "kl": 0.011413590982556343, "learning_rate": 4.833904109589041e-07, "loss": -0.0014, "step": 98 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1711.0, "completions/max_terminated_length": 1711.0, "completions/mean_length": 637.0, "completions/mean_terminated_length": 637.0, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.6277164816856384, "epoch": 0.678082191780822, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.0980919226965784, "kl": 0.009950196370482445, "learning_rate": 4.832191780821917e-07, "loss": 0.014, "num_tokens": 1991262.0, "reward": 0.26640284061431885, "reward_std": 0.05745915696024895, "rewards/check_gptzero_func/mean": 0.26640281081199646, "rewards/check_gptzero_func/std": 0.35439813137054443, "sampling/importance_sampling_ratio/max": 1.619573712348938, "sampling/importance_sampling_ratio/mean": 0.9994333982467651, "sampling/importance_sampling_ratio/min": 0.46717071533203125, "sampling/sampling_logp_difference/max": 0.7610604763031006, "sampling/sampling_logp_difference/mean": 0.021274743601679802, "step": 99 }, { "clip_ratio/high_max": 0.0051124743185937405, "clip_ratio/high_mean": 0.0033778210636228323, "clip_ratio/low_mean": 0.0015478351851925254, "clip_ratio/low_min": 0.0008274720748886466, "clip_ratio/region_mean": 0.004925656598061323, "entropy": 0.6279528737068176, "epoch": 0.684931506849315, "grad_norm": 0.9658837717505014, "kl": 0.009790549986064434, "learning_rate": 4.830479452054794e-07, "loss": 0.0115, "step": 100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1715.0, "completions/max_terminated_length": 1715.0, "completions/mean_length": 788.8214721679688, "completions/mean_terminated_length": 788.8214721679688, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "entropy": 0.6602219939231873, "epoch": 0.6917808219178082, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 0.9265780679155551, "kl": 0.009187743067741394, "learning_rate": 4.828767123287671e-07, "loss": 0.0059, "num_tokens": 2040562.0, "reward": 0.20028144121170044, "reward_std": 0.06671323627233505, "rewards/check_gptzero_func/mean": 0.20028142631053925, "rewards/check_gptzero_func/std": 0.3089793026447296, "sampling/importance_sampling_ratio/max": 1.6702730655670166, "sampling/importance_sampling_ratio/mean": 0.999923825263977, "sampling/importance_sampling_ratio/min": 0.5325469374656677, "sampling/sampling_logp_difference/max": 0.6300842761993408, "sampling/sampling_logp_difference/mean": 0.02237076871097088, "step": 101 }, { "clip_ratio/high_max": 0.0042105261236429214, "clip_ratio/high_mean": 0.0029237275011837482, "clip_ratio/low_mean": 0.001647714525461197, "clip_ratio/low_min": 0.0008821259252727032, "clip_ratio/region_mean": 0.004571442026644945, "entropy": 0.6598459482192993, "epoch": 0.6986301369863014, "grad_norm": 0.8850896112625194, "kl": 0.009258232079446316, "learning_rate": 4.827054794520547e-07, "loss": 0.0034, "step": 102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1633.0, "completions/max_terminated_length": 1633.0, "completions/mean_length": 698.2142944335938, "completions/mean_terminated_length": 698.2142944335938, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "entropy": 0.7219709157943726, "epoch": 0.7054794520547946, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 0.9957607167552062, "kl": 0.01226342748850584, "learning_rate": 4.825342465753424e-07, "loss": -0.0232, "num_tokens": 2084538.0, "reward": 0.20832635462284088, "reward_std": 0.07641115039587021, "rewards/check_gptzero_func/mean": 0.2083263397216797, "rewards/check_gptzero_func/std": 0.30046403408050537, "sampling/importance_sampling_ratio/max": 1.9441617727279663, "sampling/importance_sampling_ratio/mean": 1.000141978263855, "sampling/importance_sampling_ratio/min": 0.44864416122436523, "sampling/sampling_logp_difference/max": 0.8015252351760864, "sampling/sampling_logp_difference/mean": 0.022955315187573433, "step": 103 }, { "clip_ratio/high_max": 0.004181601107120514, "clip_ratio/high_mean": 0.002969307592138648, "clip_ratio/low_mean": 0.001665961928665638, "clip_ratio/low_min": 0.0007436326704919338, "clip_ratio/region_mean": 0.00463526975363493, "entropy": 0.7224189639091492, "epoch": 0.7123287671232876, "grad_norm": 0.9479313892738572, "kl": 0.011490896344184875, "learning_rate": 4.823630136986301e-07, "loss": -0.026, "step": 104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1774.0, "completions/max_terminated_length": 1774.0, "completions/mean_length": 715.3035888671875, "completions/mean_terminated_length": 715.3035888671875, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "entropy": 0.6754369139671326, "epoch": 0.7191780821917808, "frac_reward_zero_std": 0.0, "grad_norm": 1.020600780254282, "kl": 0.01029200665652752, "learning_rate": 4.821917808219178e-07, "loss": -0.015, "num_tokens": 2129341.0, "reward": 0.2525421380996704, "reward_std": 0.09761429578065872, "rewards/check_gptzero_func/mean": 0.252542108297348, "rewards/check_gptzero_func/std": 0.34821757674217224, "sampling/importance_sampling_ratio/max": 1.6240513324737549, "sampling/importance_sampling_ratio/mean": 0.9998899102210999, "sampling/importance_sampling_ratio/min": 0.30086326599121094, "sampling/sampling_logp_difference/max": 1.2010993957519531, "sampling/sampling_logp_difference/mean": 0.02282712794840336, "step": 105 }, { "clip_ratio/high_max": 0.004547751508653164, "clip_ratio/high_mean": 0.0036340313963592052, "clip_ratio/low_mean": 0.002035415731370449, "clip_ratio/low_min": 0.0012632642174139619, "clip_ratio/region_mean": 0.005669447127729654, "entropy": 0.6747764348983765, "epoch": 0.726027397260274, "grad_norm": 0.9489439245524416, "kl": 0.012483260594308376, "learning_rate": 4.820205479452054e-07, "loss": -0.0178, "step": 106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 693.357177734375, "completions/mean_terminated_length": 693.357177734375, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "entropy": 0.670443594455719, "epoch": 0.7328767123287672, "frac_reward_zero_std": 0.0, "grad_norm": 1.1152934394277505, "kl": 0.009298834018409252, "learning_rate": 4.818493150684931e-07, "loss": -0.0232, "num_tokens": 2173113.0, "reward": 0.15059486031532288, "reward_std": 0.06375542283058167, "rewards/check_gptzero_func/mean": 0.15059484541416168, "rewards/check_gptzero_func/std": 0.23402854800224304, "sampling/importance_sampling_ratio/max": 1.9157118797302246, "sampling/importance_sampling_ratio/mean": 1.0003081560134888, "sampling/importance_sampling_ratio/min": 0.6129069328308105, "sampling/sampling_logp_difference/max": 0.6500892639160156, "sampling/sampling_logp_difference/mean": 0.023337258026003838, "step": 107 }, { "clip_ratio/high_max": 0.005046072881668806, "clip_ratio/high_mean": 0.0038157349918037653, "clip_ratio/low_mean": 0.0020433817990124226, "clip_ratio/low_min": 0.001658099819906056, "clip_ratio/region_mean": 0.0058591170236468315, "entropy": 0.6697315573692322, "epoch": 0.7397260273972602, "grad_norm": 0.9803882642324947, "kl": 0.00958580244332552, "learning_rate": 4.816780821917808e-07, "loss": -0.0258, "step": 108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2543.0, "completions/max_terminated_length": 2543.0, "completions/mean_length": 701.9464721679688, "completions/mean_terminated_length": 701.9464721679688, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.7455806732177734, "epoch": 0.7465753424657534, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.0333634086858539, "kl": 0.010200954042375088, "learning_rate": 4.815068493150684e-07, "loss": 0.0031, "num_tokens": 2216672.0, "reward": 0.2236851006746292, "reward_std": 0.06687161326408386, "rewards/check_gptzero_func/mean": 0.2236851006746292, "rewards/check_gptzero_func/std": 0.3516465425491333, "sampling/importance_sampling_ratio/max": 1.692874789237976, "sampling/importance_sampling_ratio/mean": 0.9997504353523254, "sampling/importance_sampling_ratio/min": 0.609056830406189, "sampling/sampling_logp_difference/max": 0.5264281034469604, "sampling/sampling_logp_difference/mean": 0.022957684472203255, "step": 109 }, { "clip_ratio/high_max": 0.004788507707417011, "clip_ratio/high_mean": 0.003466331632807851, "clip_ratio/low_mean": 0.0016419206513091922, "clip_ratio/low_min": 0.0009821574203670025, "clip_ratio/region_mean": 0.005108252167701721, "entropy": 0.7455806732177734, "epoch": 0.7534246575342466, "grad_norm": 0.9589304433589255, "kl": 0.01028840709477663, "learning_rate": 4.813356164383561e-07, "loss": 0.0003, "step": 110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1113.0, "completions/max_terminated_length": 1113.0, "completions/mean_length": 547.8928833007812, "completions/mean_terminated_length": 547.8928833007812, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.6060380339622498, "epoch": 0.7602739726027398, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.1781076996798807, "kl": 0.021233191713690758, "learning_rate": 4.811643835616437e-07, "loss": -0.0038, "num_tokens": 2251990.0, "reward": 0.28087183833122253, "reward_std": 0.059800419956445694, "rewards/check_gptzero_func/mean": 0.28087183833122253, "rewards/check_gptzero_func/std": 0.36292240023612976, "sampling/importance_sampling_ratio/max": 1.839231014251709, "sampling/importance_sampling_ratio/mean": 1.0000498294830322, "sampling/importance_sampling_ratio/min": 0.15502624213695526, "sampling/sampling_logp_difference/max": 1.8641608953475952, "sampling/sampling_logp_difference/mean": 0.02144014462828636, "step": 111 }, { "clip_ratio/high_max": 0.005837711505591869, "clip_ratio/high_mean": 0.0037248102016747, "clip_ratio/low_mean": 0.0020952816121280193, "clip_ratio/low_min": 0.0011231747921556234, "clip_ratio/region_mean": 0.005820091813802719, "entropy": 0.6044379472732544, "epoch": 0.7671232876712328, "grad_norm": 1.3558863743348315, "kl": 0.029866304248571396, "learning_rate": 4.809931506849314e-07, "loss": -0.0066, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2114.0, "completions/max_terminated_length": 2114.0, "completions/mean_length": 518.7857666015625, "completions/mean_terminated_length": 518.7857666015625, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.6356129050254822, "epoch": 0.773972602739726, "frac_reward_zero_std": 0.0, "grad_norm": 1.1840909436161464, "kl": 0.01741221360862255, "learning_rate": 4.808219178082192e-07, "loss": 0.0024, "num_tokens": 2286378.0, "reward": 0.3202606737613678, "reward_std": 0.0886005237698555, "rewards/check_gptzero_func/mean": 0.3202606737613678, "rewards/check_gptzero_func/std": 0.38394448161125183, "sampling/importance_sampling_ratio/max": 1.6943600177764893, "sampling/importance_sampling_ratio/mean": 1.0000622272491455, "sampling/importance_sampling_ratio/min": 0.05413072928786278, "sampling/sampling_logp_difference/max": 2.916353225708008, "sampling/sampling_logp_difference/mean": 0.022059014067053795, "step": 113 }, { "clip_ratio/high_max": 0.006589147262275219, "clip_ratio/high_mean": 0.004085692577064037, "clip_ratio/low_mean": 0.002172979759052396, "clip_ratio/low_min": 0.0009454774553887546, "clip_ratio/region_mean": 0.006258672568947077, "entropy": 0.6356129050254822, "epoch": 0.7808219178082192, "grad_norm": 1.12590343313897, "kl": 0.01756015419960022, "learning_rate": 4.806506849315069e-07, "loss": -0.0007, "step": 114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1239.0, "completions/max_terminated_length": 1239.0, "completions/mean_length": 589.0714721679688, "completions/mean_terminated_length": 589.0714721679688, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.6398310661315918, "epoch": 0.7876712328767124, "frac_reward_zero_std": 0.0, "grad_norm": 1.0569212586500332, "kl": 0.017804598435759544, "learning_rate": 4.804794520547946e-07, "loss": -0.0018, "num_tokens": 2324406.0, "reward": 0.18648380041122437, "reward_std": 0.07314244657754898, "rewards/check_gptzero_func/mean": 0.18648378551006317, "rewards/check_gptzero_func/std": 0.30726054310798645, "sampling/importance_sampling_ratio/max": 1.5017480850219727, "sampling/importance_sampling_ratio/mean": 1.0003035068511963, "sampling/importance_sampling_ratio/min": 0.6005937457084656, "sampling/sampling_logp_difference/max": 0.5098365545272827, "sampling/sampling_logp_difference/mean": 0.022141380235552788, "step": 115 }, { "clip_ratio/high_max": 0.006483999080955982, "clip_ratio/high_mean": 0.003971724770963192, "clip_ratio/low_mean": 0.0017779384506866336, "clip_ratio/low_min": 0.0008393486496061087, "clip_ratio/region_mean": 0.00574966287240386, "entropy": 0.6422339677810669, "epoch": 0.7945205479452054, "grad_norm": 1.009751077830725, "kl": 0.017738020047545433, "learning_rate": 4.803082191780822e-07, "loss": -0.0047, "step": 116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1497.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 530.4107666015625, "completions/mean_terminated_length": 530.4107666015625, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.6368780136108398, "epoch": 0.8013698630136986, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.2662325019431482, "kl": 0.017839383333921432, "learning_rate": 4.801369863013699e-07, "loss": 0.006, "num_tokens": 2359069.0, "reward": 0.38407787680625916, "reward_std": 0.09048590064048767, "rewards/check_gptzero_func/mean": 0.38407784700393677, "rewards/check_gptzero_func/std": 0.37413352727890015, "sampling/importance_sampling_ratio/max": 1.8876349925994873, "sampling/importance_sampling_ratio/mean": 0.9998966455459595, "sampling/importance_sampling_ratio/min": 0.6262783408164978, "sampling/sampling_logp_difference/max": 0.6353247165679932, "sampling/sampling_logp_difference/mean": 0.02227889373898506, "step": 117 }, { "clip_ratio/high_max": 0.005237887613475323, "clip_ratio/high_mean": 0.003917110152542591, "clip_ratio/low_mean": 0.002491515129804611, "clip_ratio/low_min": 0.0012420156272128224, "clip_ratio/region_mean": 0.006408625282347202, "entropy": 0.6374130845069885, "epoch": 0.8082191780821918, "grad_norm": 1.6348020337674711, "kl": 0.01796843111515045, "learning_rate": 4.799657534246575e-07, "loss": 0.0025, "step": 118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 574.0535888671875, "completions/mean_terminated_length": 574.0535888671875, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "entropy": 0.6538468599319458, "epoch": 0.815068493150685, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.0949296512283921, "kl": 0.020418450236320496, "learning_rate": 4.797945205479452e-07, "loss": -0.0101, "num_tokens": 2396096.0, "reward": 0.2958105504512787, "reward_std": 0.07148084789514542, "rewards/check_gptzero_func/mean": 0.2958105206489563, "rewards/check_gptzero_func/std": 0.3250974714756012, "sampling/importance_sampling_ratio/max": 1.9100761413574219, "sampling/importance_sampling_ratio/mean": 1.0003507137298584, "sampling/importance_sampling_ratio/min": 0.4057960510253906, "sampling/sampling_logp_difference/max": 0.9019045829772949, "sampling/sampling_logp_difference/mean": 0.02247105911374092, "step": 119 }, { "clip_ratio/high_max": 0.005473998375236988, "clip_ratio/high_mean": 0.00381966563872993, "clip_ratio/low_mean": 0.00223062583245337, "clip_ratio/low_min": 0.00028169015422463417, "clip_ratio/region_mean": 0.006050291936844587, "entropy": 0.6550874710083008, "epoch": 0.821917808219178, "grad_norm": 1.026426756246321, "kl": 0.018990013748407364, "learning_rate": 4.796232876712329e-07, "loss": -0.0131, "step": 120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1544.0, "completions/max_terminated_length": 1544.0, "completions/mean_length": 519.357177734375, "completions/mean_terminated_length": 519.357177734375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.6270474791526794, "epoch": 0.8287671232876712, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.1924482066364808, "kl": 0.020098445937037468, "learning_rate": 4.794520547945205e-07, "loss": -0.0043, "num_tokens": 2430260.0, "reward": 0.29646849632263184, "reward_std": 0.11449331045150757, "rewards/check_gptzero_func/mean": 0.2964685261249542, "rewards/check_gptzero_func/std": 0.3537934422492981, "sampling/importance_sampling_ratio/max": 1.6088849306106567, "sampling/importance_sampling_ratio/mean": 1.0000154972076416, "sampling/importance_sampling_ratio/min": 0.6054292321205139, "sampling/sampling_logp_difference/max": 0.5018175840377808, "sampling/sampling_logp_difference/mean": 0.021678121760487556, "step": 121 }, { "clip_ratio/high_max": 0.005964214913547039, "clip_ratio/high_mean": 0.0038154483772814274, "clip_ratio/low_mean": 0.0022718566469848156, "clip_ratio/low_min": 0.0009573958814144135, "clip_ratio/region_mean": 0.00608730548992753, "entropy": 0.6270474791526794, "epoch": 0.8356164383561644, "grad_norm": 1.1318387108441823, "kl": 0.019673576578497887, "learning_rate": 4.792808219178083e-07, "loss": -0.0078, "step": 122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 501.0714416503906, "completions/mean_terminated_length": 501.0714416503906, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "entropy": 0.6437985301017761, "epoch": 0.8424657534246576, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.252968908931099, "kl": 0.020104888826608658, "learning_rate": 4.791095890410959e-07, "loss": -0.0025, "num_tokens": 2463240.0, "reward": 0.28241100907325745, "reward_std": 0.027187027037143707, "rewards/check_gptzero_func/mean": 0.28241100907325745, "rewards/check_gptzero_func/std": 0.3761792778968811, "sampling/importance_sampling_ratio/max": 1.4950871467590332, "sampling/importance_sampling_ratio/mean": 0.9999162554740906, "sampling/importance_sampling_ratio/min": 0.5834659337997437, "sampling/sampling_logp_difference/max": 0.5387692451477051, "sampling/sampling_logp_difference/mean": 0.021958641707897186, "step": 123 }, { "clip_ratio/high_max": 0.005639913026243448, "clip_ratio/high_mean": 0.004623250104486942, "clip_ratio/low_mean": 0.002582652261480689, "clip_ratio/low_min": 0.0006382978754118085, "clip_ratio/region_mean": 0.007205902598798275, "entropy": 0.6434376835823059, "epoch": 0.8493150684931506, "grad_norm": 1.157546479038916, "kl": 0.020832495763897896, "learning_rate": 4.789383561643836e-07, "loss": -0.0058, "step": 124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1832.0, "completions/max_terminated_length": 1832.0, "completions/mean_length": 749.0535888671875, "completions/mean_terminated_length": 749.0535888671875, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "entropy": 0.6664509177207947, "epoch": 0.8561643835616438, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 0.9558013166043401, "kl": 0.015045592561364174, "learning_rate": 4.787671232876712e-07, "loss": 0.0007, "num_tokens": 2509821.0, "reward": 0.15890823304653168, "reward_std": 0.06779876351356506, "rewards/check_gptzero_func/mean": 0.15890823304653168, "rewards/check_gptzero_func/std": 0.21635758876800537, "sampling/importance_sampling_ratio/max": 1.5823919773101807, "sampling/importance_sampling_ratio/mean": 1.0003548860549927, "sampling/importance_sampling_ratio/min": 0.6673845052719116, "sampling/sampling_logp_difference/max": 0.4589376449584961, "sampling/sampling_logp_difference/mean": 0.022839896380901337, "step": 125 }, { "clip_ratio/high_max": 0.004532700404524803, "clip_ratio/high_mean": 0.0035000804346054792, "clip_ratio/low_mean": 0.001859834068454802, "clip_ratio/low_min": 0.00119331746827811, "clip_ratio/region_mean": 0.005359914619475603, "entropy": 0.6672226786613464, "epoch": 0.863013698630137, "grad_norm": 0.9088167632833702, "kl": 0.01474203635007143, "learning_rate": 4.785958904109589e-07, "loss": -0.0019, "step": 126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1771.0, "completions/max_terminated_length": 1771.0, "completions/mean_length": 655.857177734375, "completions/mean_terminated_length": 655.857177734375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.6300221085548401, "epoch": 0.8698630136986302, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.045101265583832, "kl": 0.017885003238916397, "learning_rate": 4.784246575342466e-07, "loss": -0.0015, "num_tokens": 2551823.0, "reward": 0.20975130796432495, "reward_std": 0.0628761500120163, "rewards/check_gptzero_func/mean": 0.20975129306316376, "rewards/check_gptzero_func/std": 0.3136442005634308, "sampling/importance_sampling_ratio/max": 1.6056817770004272, "sampling/importance_sampling_ratio/mean": 0.9999440312385559, "sampling/importance_sampling_ratio/min": 0.03236517682671547, "sampling/sampling_logp_difference/max": 3.4306721687316895, "sampling/sampling_logp_difference/mean": 0.02215009741485119, "step": 127 }, { "clip_ratio/high_max": 0.005244231317192316, "clip_ratio/high_mean": 0.004208513535559177, "clip_ratio/low_mean": 0.0022189507726579905, "clip_ratio/low_min": 0.001142041408456862, "clip_ratio/region_mean": 0.0064274645410478115, "entropy": 0.6300221085548401, "epoch": 0.8767123287671232, "grad_norm": 0.9884435768784138, "kl": 0.017853517085313797, "learning_rate": 4.782534246575342e-07, "loss": -0.0045, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 700.6607666015625, "completions/mean_terminated_length": 700.6607666015625, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "entropy": 0.674447238445282, "epoch": 0.8835616438356164, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.0523348075215304, "kl": 0.016234155744314194, "learning_rate": 4.780821917808219e-07, "loss": -0.0016, "num_tokens": 2596708.0, "reward": 0.28019827604293823, "reward_std": 0.12074566632509232, "rewards/check_gptzero_func/mean": 0.28019824624061584, "rewards/check_gptzero_func/std": 0.353007435798645, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0001286268234253, "sampling/importance_sampling_ratio/min": 0.483479768037796, "sampling/sampling_logp_difference/max": 0.9101448059082031, "sampling/sampling_logp_difference/mean": 0.02353024110198021, "step": 129 }, { "clip_ratio/high_max": 0.005905511789023876, "clip_ratio/high_mean": 0.0038669572677463293, "clip_ratio/low_mean": 0.0020831283181905746, "clip_ratio/low_min": 0.001319067901931703, "clip_ratio/region_mean": 0.005950085818767548, "entropy": 0.674447238445282, "epoch": 0.8904109589041096, "grad_norm": 0.9755712727026206, "kl": 0.016480909660458565, "learning_rate": 4.779109589041096e-07, "loss": -0.0045, "step": 130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1779.0, "completions/max_terminated_length": 1779.0, "completions/mean_length": 578.5714721679688, "completions/mean_terminated_length": 578.5714721679688, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.6350026726722717, "epoch": 0.8972602739726028, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.1019381360247407, "kl": 0.02301154099404812, "learning_rate": 4.777397260273973e-07, "loss": 0.0033, "num_tokens": 2633776.0, "reward": 0.368809312582016, "reward_std": 0.07138953357934952, "rewards/check_gptzero_func/mean": 0.3688092827796936, "rewards/check_gptzero_func/std": 0.41689297556877136, "sampling/importance_sampling_ratio/max": 1.6107064485549927, "sampling/importance_sampling_ratio/mean": 1.0001517534255981, "sampling/importance_sampling_ratio/min": 0.568545937538147, "sampling/sampling_logp_difference/max": 0.5646731853485107, "sampling/sampling_logp_difference/mean": 0.023081686347723007, "step": 131 }, { "clip_ratio/high_max": 0.006127138156443834, "clip_ratio/high_mean": 0.004557103384286165, "clip_ratio/low_mean": 0.0024317686911672354, "clip_ratio/low_min": 0.0008438818622380495, "clip_ratio/region_mean": 0.006988872308284044, "entropy": 0.6350026726722717, "epoch": 0.9041095890410958, "grad_norm": 1.0691366533172744, "kl": 0.023794203996658325, "learning_rate": 4.775684931506849e-07, "loss": -0.0003, "step": 132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1297.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 604.3392944335938, "completions/mean_terminated_length": 604.3392944335938, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "entropy": 0.6247599720954895, "epoch": 0.910958904109589, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.0370125036357276, "kl": 0.018155112862586975, "learning_rate": 4.773972602739726e-07, "loss": -0.0055, "num_tokens": 2672275.0, "reward": 0.2417442500591278, "reward_std": 0.05235552415251732, "rewards/check_gptzero_func/mean": 0.2417442500591278, "rewards/check_gptzero_func/std": 0.3496074676513672, "sampling/importance_sampling_ratio/max": 1.636925458908081, "sampling/importance_sampling_ratio/mean": 0.9999204277992249, "sampling/importance_sampling_ratio/min": 0.6171947717666626, "sampling/sampling_logp_difference/max": 0.49281978607177734, "sampling/sampling_logp_difference/mean": 0.022497620433568954, "step": 133 }, { "clip_ratio/high_max": 0.005678480491042137, "clip_ratio/high_mean": 0.00409776670858264, "clip_ratio/low_mean": 0.002209337428212166, "clip_ratio/low_min": 0.0010752688394859433, "clip_ratio/region_mean": 0.0063071041367948055, "entropy": 0.6234744787216187, "epoch": 0.9178082191780822, "grad_norm": 0.9857044179421152, "kl": 0.018680959939956665, "learning_rate": 4.772260273972603e-07, "loss": -0.0087, "step": 134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1714.0, "completions/max_terminated_length": 1714.0, "completions/mean_length": 699.8392944335938, "completions/mean_terminated_length": 699.8392944335938, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.666135847568512, "epoch": 0.9246575342465754, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.0578175154305762, "kl": 0.021104201674461365, "learning_rate": 4.770547945205479e-07, "loss": -0.0114, "num_tokens": 2715932.0, "reward": 0.2040422260761261, "reward_std": 0.08861686289310455, "rewards/check_gptzero_func/mean": 0.2040422111749649, "rewards/check_gptzero_func/std": 0.24008065462112427, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9995146989822388, "sampling/importance_sampling_ratio/min": 0.19775307178497314, "sampling/sampling_logp_difference/max": 1.6207361221313477, "sampling/sampling_logp_difference/mean": 0.023150267079472542, "step": 135 }, { "clip_ratio/high_max": 0.005826499778777361, "clip_ratio/high_mean": 0.004584095906466246, "clip_ratio/low_mean": 0.0022271359339356422, "clip_ratio/low_min": 0.0012575453147292137, "clip_ratio/region_mean": 0.006811231840401888, "entropy": 0.666135847568512, "epoch": 0.9315068493150684, "grad_norm": 0.9912000148320095, "kl": 0.024332718923687935, "learning_rate": 4.768835616438356e-07, "loss": -0.0148, "step": 136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2172.0, "completions/max_terminated_length": 2172.0, "completions/mean_length": 660.9464721679688, "completions/mean_terminated_length": 660.9464721679688, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "entropy": 0.6205853223800659, "epoch": 0.9383561643835616, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.1357369726269317, "kl": 0.023902926594018936, "learning_rate": 4.7671232876712324e-07, "loss": 0.006, "num_tokens": 2758133.0, "reward": 0.33098793029785156, "reward_std": 0.08812516182661057, "rewards/check_gptzero_func/mean": 0.33098793029785156, "rewards/check_gptzero_func/std": 0.35234519839286804, "sampling/importance_sampling_ratio/max": 1.6149073839187622, "sampling/importance_sampling_ratio/mean": 0.9998918175697327, "sampling/importance_sampling_ratio/min": 0.4059228301048279, "sampling/sampling_logp_difference/max": 0.9015922546386719, "sampling/sampling_logp_difference/mean": 0.021671123802661896, "step": 137 }, { "clip_ratio/high_max": 0.005959982983767986, "clip_ratio/high_mean": 0.0038348864763975143, "clip_ratio/low_mean": 0.002395733492448926, "clip_ratio/low_min": 0.001361624919809401, "clip_ratio/region_mean": 0.006230620201677084, "entropy": 0.621531069278717, "epoch": 0.9452054794520548, "grad_norm": 1.0137977241917147, "kl": 0.02417910099029541, "learning_rate": 4.7654109589041094e-07, "loss": 0.0025, "step": 138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 475.3214416503906, "completions/mean_terminated_length": 475.3214416503906, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.6226816177368164, "epoch": 0.952054794520548, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.2427971272830265, "kl": 0.02777375467121601, "learning_rate": 4.7636986301369864e-07, "loss": -0.0154, "num_tokens": 2789405.0, "reward": 0.3171728253364563, "reward_std": 0.06632539629936218, "rewards/check_gptzero_func/mean": 0.3171727955341339, "rewards/check_gptzero_func/std": 0.36973050236701965, "sampling/importance_sampling_ratio/max": 1.4759069681167603, "sampling/importance_sampling_ratio/mean": 0.9992812275886536, "sampling/importance_sampling_ratio/min": 0.6056217551231384, "sampling/sampling_logp_difference/max": 0.5014996528625488, "sampling/sampling_logp_difference/mean": 0.022005785256624222, "step": 139 }, { "clip_ratio/high_max": 0.008890085853636265, "clip_ratio/high_mean": 0.006118430756032467, "clip_ratio/low_mean": 0.002570927608758211, "clip_ratio/low_min": 0.0017113519133999944, "clip_ratio/region_mean": 0.008689358830451965, "entropy": 0.6254869699478149, "epoch": 0.958904109589041, "grad_norm": 1.1727135965557849, "kl": 0.02728389762341976, "learning_rate": 4.761986301369863e-07, "loss": -0.0192, "step": 140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 527.7678833007812, "completions/mean_terminated_length": 527.7678833007812, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.584328830242157, "epoch": 0.9657534246575342, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.182197861943131, "kl": 0.033146511763334274, "learning_rate": 4.7602739726027394e-07, "loss": -0.0046, "num_tokens": 2823298.0, "reward": 0.2811133563518524, "reward_std": 0.062341947108507156, "rewards/check_gptzero_func/mean": 0.2811133563518524, "rewards/check_gptzero_func/std": 0.334384024143219, "sampling/importance_sampling_ratio/max": 1.603974461555481, "sampling/importance_sampling_ratio/mean": 1.0001323223114014, "sampling/importance_sampling_ratio/min": 0.1674417406320572, "sampling/sampling_logp_difference/max": 1.787119746208191, "sampling/sampling_logp_difference/mean": 0.02174796536564827, "step": 141 }, { "clip_ratio/high_max": 0.007302405312657356, "clip_ratio/high_mean": 0.005393455736339092, "clip_ratio/low_mean": 0.0028298720717430115, "clip_ratio/low_min": 0.0015216839965432882, "clip_ratio/region_mean": 0.008223327808082104, "entropy": 0.5856890082359314, "epoch": 0.9726027397260274, "grad_norm": 1.1360290364223442, "kl": 0.03308216109871864, "learning_rate": 4.7585616438356165e-07, "loss": -0.0084, "step": 142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1120.0, "completions/max_terminated_length": 1120.0, "completions/mean_length": 626.5, "completions/mean_terminated_length": 626.5, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "entropy": 0.6396209001541138, "epoch": 0.9794520547945206, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.0829181615455132, "kl": 0.020728664472699165, "learning_rate": 4.756849315068493e-07, "loss": -0.0205, "num_tokens": 2863354.0, "reward": 0.30012255907058716, "reward_std": 0.10973531007766724, "rewards/check_gptzero_func/mean": 0.30012252926826477, "rewards/check_gptzero_func/std": 0.3197723925113678, "sampling/importance_sampling_ratio/max": 1.580580711364746, "sampling/importance_sampling_ratio/mean": 0.9996016621589661, "sampling/importance_sampling_ratio/min": 0.4598880112171173, "sampling/sampling_logp_difference/max": 0.7767722606658936, "sampling/sampling_logp_difference/mean": 0.022375155240297318, "step": 143 }, { "clip_ratio/high_max": 0.00548515236005187, "clip_ratio/high_mean": 0.004140198230743408, "clip_ratio/low_mean": 0.002350926399230957, "clip_ratio/low_min": 0.0009457159321755171, "clip_ratio/region_mean": 0.006491124629974365, "entropy": 0.6384310126304626, "epoch": 0.9863013698630136, "grad_norm": 1.0192472254462561, "kl": 0.021360469982028008, "learning_rate": 4.7551369863013695e-07, "loss": -0.0237, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1078.0, "completions/max_terminated_length": 1078.0, "completions/mean_length": 610.7857666015625, "completions/mean_terminated_length": 610.7857666015625, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.6437399983406067, "epoch": 0.9931506849315068, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.1083792044283145, "kl": 0.02738848887383938, "learning_rate": 4.7534246575342465e-07, "loss": -0.0057, "num_tokens": 2902182.0, "reward": 0.267678827047348, "reward_std": 0.12891468405723572, "rewards/check_gptzero_func/mean": 0.26767879724502563, "rewards/check_gptzero_func/std": 0.306485652923584, "sampling/importance_sampling_ratio/max": 1.6292201280593872, "sampling/importance_sampling_ratio/mean": 1.0000885725021362, "sampling/importance_sampling_ratio/min": 0.5943025946617126, "sampling/sampling_logp_difference/max": 0.5203666687011719, "sampling/sampling_logp_difference/mean": 0.02244720794260502, "step": 145 }, { "clip_ratio/high_max": 0.00476327957585454, "clip_ratio/high_mean": 0.0038016592152416706, "clip_ratio/low_mean": 0.0028647694271057844, "clip_ratio/low_min": 0.002176616806536913, "clip_ratio/region_mean": 0.006666428409516811, "entropy": 0.6432503461837769, "epoch": 1.0, "grad_norm": 1.0377789016994055, "kl": 0.027046455070376396, "learning_rate": 4.751712328767123e-07, "loss": -0.0095, "step": 146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 553.25, "completions/mean_terminated_length": 553.25, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.5897545218467712, "epoch": 1.0068493150684932, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.1204786081934237, "kl": 0.035217661410570145, "learning_rate": 4.7499999999999995e-07, "loss": -0.0128, "num_tokens": 2938120.0, "reward": 0.3714272081851959, "reward_std": 0.0896967425942421, "rewards/check_gptzero_func/mean": 0.3714272081851959, "rewards/check_gptzero_func/std": 0.36110618710517883, "sampling/importance_sampling_ratio/max": 1.7205108404159546, "sampling/importance_sampling_ratio/mean": 0.9994993209838867, "sampling/importance_sampling_ratio/min": 0.23330101370811462, "sampling/sampling_logp_difference/max": 1.45542573928833, "sampling/sampling_logp_difference/mean": 0.021586231887340546, "step": 147 }, { "clip_ratio/high_max": 0.006664734799414873, "clip_ratio/high_mean": 0.004718645475804806, "clip_ratio/low_mean": 0.0032096419017761946, "clip_ratio/low_min": 0.001457017962820828, "clip_ratio/region_mean": 0.007928287610411644, "entropy": 0.5915244221687317, "epoch": 1.0136986301369864, "grad_norm": 1.283282613384224, "kl": 0.03492439538240433, "learning_rate": 4.7482876712328766e-07, "loss": -0.0163, "step": 148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1163.0, "completions/max_terminated_length": 1163.0, "completions/mean_length": 485.83929443359375, "completions/mean_terminated_length": 485.83929443359375, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.6291061043739319, "epoch": 1.0205479452054795, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.2784807316841476, "kl": 0.041249435395002365, "learning_rate": 4.7465753424657536e-07, "loss": 0.003, "num_tokens": 2970149.0, "reward": 0.3835770785808563, "reward_std": 0.07471778988838196, "rewards/check_gptzero_func/mean": 0.3835770785808563, "rewards/check_gptzero_func/std": 0.39595159888267517, "sampling/importance_sampling_ratio/max": 1.515424132347107, "sampling/importance_sampling_ratio/mean": 0.9996601939201355, "sampling/importance_sampling_ratio/min": 0.5938921570777893, "sampling/sampling_logp_difference/max": 0.5210576057434082, "sampling/sampling_logp_difference/mean": 0.0225253626704216, "step": 149 }, { "clip_ratio/high_max": 0.006103375926613808, "clip_ratio/high_mean": 0.004323169123381376, "clip_ratio/low_mean": 0.0028842499013990164, "clip_ratio/low_min": 0.000650618108920753, "clip_ratio/region_mean": 0.007207419257611036, "entropy": 0.6277785301208496, "epoch": 1.0273972602739727, "grad_norm": 1.16502398843688, "kl": 0.04355955868959427, "learning_rate": 4.74486301369863e-07, "loss": -0.0008, "step": 150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1715.0, "completions/max_terminated_length": 1715.0, "completions/mean_length": 543.232177734375, "completions/mean_terminated_length": 543.232177734375, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "entropy": 0.5971288681030273, "epoch": 1.0342465753424657, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.1240269668683984, "kl": 0.03371803089976311, "learning_rate": 4.7431506849315066e-07, "loss": 0.0164, "num_tokens": 3005578.0, "reward": 0.45768895745277405, "reward_std": 0.10103446990251541, "rewards/check_gptzero_func/mean": 0.45768895745277405, "rewards/check_gptzero_func/std": 0.34442585706710815, "sampling/importance_sampling_ratio/max": 1.6088849306106567, "sampling/importance_sampling_ratio/mean": 1.0000921487808228, "sampling/importance_sampling_ratio/min": 0.5127859115600586, "sampling/sampling_logp_difference/max": 0.6678968667984009, "sampling/sampling_logp_difference/mean": 0.021564338356256485, "step": 151 }, { "clip_ratio/high_max": 0.0062953997403383255, "clip_ratio/high_mean": 0.004520761780440807, "clip_ratio/low_mean": 0.002700344193726778, "clip_ratio/low_min": 0.0013127666898071766, "clip_ratio/region_mean": 0.007221105974167585, "entropy": 0.5979951024055481, "epoch": 1.0410958904109588, "grad_norm": 1.1137305751895272, "kl": 0.03382372111082077, "learning_rate": 4.7414383561643836e-07, "loss": 0.0126, "step": 152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1570.0, "completions/max_terminated_length": 1570.0, "completions/mean_length": 508.0714416503906, "completions/mean_terminated_length": 508.0714416503906, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "entropy": 0.5809063911437988, "epoch": 1.047945205479452, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.1694051183297829, "kl": 0.03906629607081413, "learning_rate": 4.73972602739726e-07, "loss": 0.0015, "num_tokens": 3039290.0, "reward": 0.3975506126880646, "reward_std": 0.06464671343564987, "rewards/check_gptzero_func/mean": 0.3975505828857422, "rewards/check_gptzero_func/std": 0.34004032611846924, "sampling/importance_sampling_ratio/max": 1.8019201755523682, "sampling/importance_sampling_ratio/mean": 1.0000883340835571, "sampling/importance_sampling_ratio/min": 0.5038098692893982, "sampling/sampling_logp_difference/max": 0.6855564117431641, "sampling/sampling_logp_difference/mean": 0.02144979126751423, "step": 153 }, { "clip_ratio/high_max": 0.006650332361459732, "clip_ratio/high_mean": 0.00530422804877162, "clip_ratio/low_mean": 0.0025444270577281713, "clip_ratio/low_min": 0.0012422360014170408, "clip_ratio/region_mean": 0.007848655804991722, "entropy": 0.5809063911437988, "epoch": 1.0547945205479452, "grad_norm": 1.1155856414275849, "kl": 0.0375567264854908, "learning_rate": 4.7380136986301366e-07, "loss": -0.0025, "step": 154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2301.0, "completions/max_terminated_length": 2301.0, "completions/mean_length": 579.482177734375, "completions/mean_terminated_length": 579.482177734375, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.6319945454597473, "epoch": 1.0616438356164384, "frac_reward_zero_std": 0.0, "grad_norm": 1.1119663736483072, "kl": 0.038867104798555374, "learning_rate": 4.736301369863013e-07, "loss": 0.0163, "num_tokens": 3076705.0, "reward": 0.35941699147224426, "reward_std": 0.06306131184101105, "rewards/check_gptzero_func/mean": 0.35941699147224426, "rewards/check_gptzero_func/std": 0.35537707805633545, "sampling/importance_sampling_ratio/max": 1.589264988899231, "sampling/importance_sampling_ratio/mean": 0.9999123811721802, "sampling/importance_sampling_ratio/min": 0.4161491394042969, "sampling/sampling_logp_difference/max": 0.8767116069793701, "sampling/sampling_logp_difference/mean": 0.022652899846434593, "step": 155 }, { "clip_ratio/high_max": 0.006108202505856752, "clip_ratio/high_mean": 0.004038949031382799, "clip_ratio/low_mean": 0.0021810862235724926, "clip_ratio/low_min": 0.00141643057577312, "clip_ratio/region_mean": 0.006220035254955292, "entropy": 0.6341208815574646, "epoch": 1.0684931506849316, "grad_norm": 1.0563229317847904, "kl": 0.03734448179602623, "learning_rate": 4.73458904109589e-07, "loss": 0.0125, "step": 156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1519.0, "completions/max_terminated_length": 1519.0, "completions/mean_length": 552.6607666015625, "completions/mean_terminated_length": 552.6607666015625, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.6409028768539429, "epoch": 1.0753424657534247, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.1898376772512127, "kl": 0.04016624018549919, "learning_rate": 4.732876712328767e-07, "loss": -0.0194, "num_tokens": 3112724.0, "reward": 0.35917267203330994, "reward_std": 0.10156037658452988, "rewards/check_gptzero_func/mean": 0.35917264223098755, "rewards/check_gptzero_func/std": 0.3524096608161926, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.000478982925415, "sampling/importance_sampling_ratio/min": 0.5556181073188782, "sampling/sampling_logp_difference/max": 0.8411612510681152, "sampling/sampling_logp_difference/mean": 0.02277453988790512, "step": 157 }, { "clip_ratio/high_max": 0.0058875479735434055, "clip_ratio/high_mean": 0.004838725086301565, "clip_ratio/low_mean": 0.0031296375673264265, "clip_ratio/low_min": 0.0008020532550290227, "clip_ratio/region_mean": 0.007968363352119923, "entropy": 0.642086386680603, "epoch": 1.0821917808219177, "grad_norm": 1.106724947616869, "kl": 0.04106585308909416, "learning_rate": 4.7311643835616437e-07, "loss": -0.0234, "step": 158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1734.0, "completions/max_terminated_length": 1734.0, "completions/mean_length": 693.8392944335938, "completions/mean_terminated_length": 693.8392944335938, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "entropy": 0.6859130263328552, "epoch": 1.0890410958904109, "frac_reward_zero_std": 0.0, "grad_norm": 1.0581804370745036, "kl": 0.040246013551950455, "learning_rate": 4.72945205479452e-07, "loss": 0.0265, "num_tokens": 3156275.0, "reward": 0.27599212527275085, "reward_std": 0.07160905003547668, "rewards/check_gptzero_func/mean": 0.27599212527275085, "rewards/check_gptzero_func/std": 0.2860566973686218, "sampling/importance_sampling_ratio/max": 1.575162410736084, "sampling/importance_sampling_ratio/mean": 0.9999051690101624, "sampling/importance_sampling_ratio/min": 0.5909302830696106, "sampling/sampling_logp_difference/max": 0.526057243347168, "sampling/sampling_logp_difference/mean": 0.022972745820879936, "step": 159 }, { "clip_ratio/high_max": 0.004735969472676516, "clip_ratio/high_mean": 0.003851883579045534, "clip_ratio/low_mean": 0.0027019104454666376, "clip_ratio/low_min": 0.0017328712856397033, "clip_ratio/region_mean": 0.006553794257342815, "entropy": 0.6839567422866821, "epoch": 1.095890410958904, "grad_norm": 0.9921972813648924, "kl": 0.041032422333955765, "learning_rate": 4.727739726027397e-07, "loss": 0.0225, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2530.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 445.9285888671875, "completions/mean_terminated_length": 445.9285888671875, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "entropy": 0.4973154664039612, "epoch": 1.1027397260273972, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.263890270660141, "kl": 0.06109704449772835, "learning_rate": 4.726027397260274e-07, "loss": 0.0148, "num_tokens": 3186383.0, "reward": 0.4045005738735199, "reward_std": 0.05171382054686546, "rewards/check_gptzero_func/mean": 0.4045005440711975, "rewards/check_gptzero_func/std": 0.3871311545372009, "sampling/importance_sampling_ratio/max": 1.7827144861221313, "sampling/importance_sampling_ratio/mean": 1.000075340270996, "sampling/importance_sampling_ratio/min": 0.6094236373901367, "sampling/sampling_logp_difference/max": 0.5781371593475342, "sampling/sampling_logp_difference/mean": 0.019563062116503716, "step": 161 }, { "clip_ratio/high_max": 0.009743589907884598, "clip_ratio/high_mean": 0.005534491967409849, "clip_ratio/low_mean": 0.0034172481391578913, "clip_ratio/low_min": 0.0021635654848068953, "clip_ratio/region_mean": 0.008951740339398384, "entropy": 0.4973154664039612, "epoch": 1.1095890410958904, "grad_norm": 3.855256035821727, "kl": 0.059415630996227264, "learning_rate": 4.72431506849315e-07, "loss": 0.0104, "step": 162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1365.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 531.2142944335938, "completions/mean_terminated_length": 531.2142944335938, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.6575630307197571, "epoch": 1.1164383561643836, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.159795064186088, "kl": 0.0562640056014061, "learning_rate": 4.7226027397260273e-07, "loss": -0.0032, "num_tokens": 3220865.0, "reward": 0.22683337330818176, "reward_std": 0.07970117032527924, "rewards/check_gptzero_func/mean": 0.22683335840702057, "rewards/check_gptzero_func/std": 0.31301262974739075, "sampling/importance_sampling_ratio/max": 1.8458366394042969, "sampling/importance_sampling_ratio/mean": 1.000009298324585, "sampling/importance_sampling_ratio/min": 0.5422559976577759, "sampling/sampling_logp_difference/max": 0.6129326820373535, "sampling/sampling_logp_difference/mean": 0.022829027846455574, "step": 163 }, { "clip_ratio/high_max": 0.007997935637831688, "clip_ratio/high_mean": 0.0054555488750338554, "clip_ratio/low_mean": 0.002649136586114764, "clip_ratio/low_min": 0.0015376729425042868, "clip_ratio/region_mean": 0.008104685693979263, "entropy": 0.6580537557601929, "epoch": 1.1232876712328768, "grad_norm": 1.127610021225118, "kl": 0.056114885956048965, "learning_rate": 4.720890410958904e-07, "loss": -0.0073, "step": 164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 520.607177734375, "completions/mean_terminated_length": 520.607177734375, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.5837265849113464, "epoch": 1.13013698630137, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.2276788086972423, "kl": 0.0606423057615757, "learning_rate": 4.7191780821917803e-07, "loss": -0.006, "num_tokens": 3254441.0, "reward": 0.3828774690628052, "reward_std": 0.07432108372449875, "rewards/check_gptzero_func/mean": 0.3828774094581604, "rewards/check_gptzero_func/std": 0.36139386892318726, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9998732805252075, "sampling/importance_sampling_ratio/min": 0.540435791015625, "sampling/sampling_logp_difference/max": 0.8759260177612305, "sampling/sampling_logp_difference/mean": 0.02153969556093216, "step": 165 }, { "clip_ratio/high_max": 0.006758278701454401, "clip_ratio/high_mean": 0.0054850587621331215, "clip_ratio/low_mean": 0.0033581103198230267, "clip_ratio/low_min": 0.0018357044318690896, "clip_ratio/region_mean": 0.00884316861629486, "entropy": 0.5843852162361145, "epoch": 1.1369863013698631, "grad_norm": 1.126372410747043, "kl": 0.06073697283864021, "learning_rate": 4.7174657534246573e-07, "loss": -0.0103, "step": 166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1808.0, "completions/max_terminated_length": 1808.0, "completions/mean_length": 644.6607666015625, "completions/mean_terminated_length": 644.6607666015625, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "entropy": 0.5936325192451477, "epoch": 1.143835616438356, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.0686164805519536, "kl": 0.0434647873044014, "learning_rate": 4.7157534246575344e-07, "loss": -0.0185, "num_tokens": 3295450.0, "reward": 0.32595130801200867, "reward_std": 0.06080653518438339, "rewards/check_gptzero_func/mean": 0.32595130801200867, "rewards/check_gptzero_func/std": 0.3229353427886963, "sampling/importance_sampling_ratio/max": 1.596354603767395, "sampling/importance_sampling_ratio/mean": 1.000238299369812, "sampling/importance_sampling_ratio/min": 0.36005598306655884, "sampling/sampling_logp_difference/max": 1.0214958190917969, "sampling/sampling_logp_difference/mean": 0.021432822570204735, "step": 167 }, { "clip_ratio/high_max": 0.006920947227627039, "clip_ratio/high_mean": 0.005593774374574423, "clip_ratio/low_mean": 0.0027995456475764513, "clip_ratio/low_min": 0.0020968755707144737, "clip_ratio/region_mean": 0.008393320254981518, "entropy": 0.5953138470649719, "epoch": 1.1506849315068493, "grad_norm": 1.0165699461533688, "kl": 0.04185573384165764, "learning_rate": 4.714041095890411e-07, "loss": -0.0221, "step": 168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 533.8392944335938, "completions/mean_terminated_length": 533.8392944335938, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.5661647319793701, "epoch": 1.1575342465753424, "frac_reward_zero_std": 0.0, "grad_norm": 1.2329541484516777, "kl": 0.060922060161828995, "learning_rate": 4.7123287671232874e-07, "loss": 0.0061, "num_tokens": 3330607.0, "reward": 0.26275524497032166, "reward_std": 0.14397768676280975, "rewards/check_gptzero_func/mean": 0.26275521516799927, "rewards/check_gptzero_func/std": 0.2985629439353943, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0001331567764282, "sampling/importance_sampling_ratio/min": 0.41622254252433777, "sampling/sampling_logp_difference/max": 0.9273507595062256, "sampling/sampling_logp_difference/mean": 0.021996768191456795, "step": 169 }, { "clip_ratio/high_max": 0.00714089535176754, "clip_ratio/high_mean": 0.006244123913347721, "clip_ratio/low_mean": 0.003401209134608507, "clip_ratio/low_min": 0.0019376089330762625, "clip_ratio/region_mean": 0.009645332582294941, "entropy": 0.567020058631897, "epoch": 1.1643835616438356, "grad_norm": 2.0223432155796086, "kl": 0.10145194083452225, "learning_rate": 4.7106164383561644e-07, "loss": 0.0026, "step": 170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1684.0, "completions/max_terminated_length": 1684.0, "completions/mean_length": 564.8392944335938, "completions/mean_terminated_length": 564.8392944335938, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.569060742855072, "epoch": 1.1712328767123288, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.1696600929310035, "kl": 0.05737113580107689, "learning_rate": 4.708904109589041e-07, "loss": -0.0128, "num_tokens": 3367182.0, "reward": 0.3576919734477997, "reward_std": 0.13187801837921143, "rewards/check_gptzero_func/mean": 0.3576919734477997, "rewards/check_gptzero_func/std": 0.2932879626750946, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9999371767044067, "sampling/importance_sampling_ratio/min": 0.3889715373516083, "sampling/sampling_logp_difference/max": 1.0066994428634644, "sampling/sampling_logp_difference/mean": 0.021621789783239365, "step": 171 }, { "clip_ratio/high_max": 0.008169440552592278, "clip_ratio/high_mean": 0.00608323747292161, "clip_ratio/low_mean": 0.003103714669123292, "clip_ratio/low_min": 0.0016588332364335656, "clip_ratio/region_mean": 0.009186952374875546, "entropy": 0.5708850622177124, "epoch": 1.178082191780822, "grad_norm": 1.1007012587726441, "kl": 0.05490586534142494, "learning_rate": 4.7071917808219174e-07, "loss": -0.0168, "step": 172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 459.5000305175781, "completions/mean_terminated_length": 459.5000305175781, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.5469130873680115, "epoch": 1.1849315068493151, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.3075032024527684, "kl": 0.09730887413024902, "learning_rate": 4.705479452054794e-07, "loss": -0.0062, "num_tokens": 3398066.0, "reward": 0.3857029676437378, "reward_std": 0.1038086786866188, "rewards/check_gptzero_func/mean": 0.3857029378414154, "rewards/check_gptzero_func/std": 0.3087492883205414, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0001980066299438, "sampling/importance_sampling_ratio/min": 0.4551142752170563, "sampling/sampling_logp_difference/max": 0.787206768989563, "sampling/sampling_logp_difference/mean": 0.02154473401606083, "step": 173 }, { "clip_ratio/high_max": 0.009536194615066051, "clip_ratio/high_mean": 0.005864343605935574, "clip_ratio/low_mean": 0.0032675382681190968, "clip_ratio/low_min": 0.0016806722851470113, "clip_ratio/region_mean": 0.009131881408393383, "entropy": 0.5485271215438843, "epoch": 1.191780821917808, "grad_norm": 1.1925860319388324, "kl": 0.09295064210891724, "learning_rate": 4.703767123287671e-07, "loss": -0.0106, "step": 174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1110.0, "completions/max_terminated_length": 1110.0, "completions/mean_length": 452.0000305175781, "completions/mean_terminated_length": 452.0000305175781, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "entropy": 0.5607518553733826, "epoch": 1.1986301369863013, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 4.607258431079398, "kl": 0.2087676078081131, "learning_rate": 4.702054794520548e-07, "loss": 0.0043, "num_tokens": 3428390.0, "reward": 0.42193958163261414, "reward_std": 0.06886405497789383, "rewards/check_gptzero_func/mean": 0.42193958163261414, "rewards/check_gptzero_func/std": 0.3535751402378082, "sampling/importance_sampling_ratio/max": 1.4999483823776245, "sampling/importance_sampling_ratio/mean": 0.9999213814735413, "sampling/importance_sampling_ratio/min": 0.5368167757987976, "sampling/sampling_logp_difference/max": 0.622098445892334, "sampling/sampling_logp_difference/mean": 0.02075863815844059, "step": 175 }, { "clip_ratio/high_max": 0.009870129637420177, "clip_ratio/high_mean": 0.005877919029444456, "clip_ratio/low_mean": 0.004198528826236725, "clip_ratio/low_min": 0.0018559136660769582, "clip_ratio/region_mean": 0.010076448321342468, "entropy": 0.5654398202896118, "epoch": 1.2054794520547945, "grad_norm": 5.5380589995008584, "kl": 0.06258005648851395, "learning_rate": 4.7003424657534245e-07, "loss": 0.0292, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1151.0, "completions/max_terminated_length": 1151.0, "completions/mean_length": 463.5000305175781, "completions/mean_terminated_length": 463.5000305175781, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.5717237591743469, "epoch": 1.2123287671232876, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.793121510646931, "kl": 0.08763088285923004, "learning_rate": 4.6986301369863015e-07, "loss": 0.0054, "num_tokens": 3459276.0, "reward": 0.3360385298728943, "reward_std": 0.08226442337036133, "rewards/check_gptzero_func/mean": 0.3360385298728943, "rewards/check_gptzero_func/std": 0.330798476934433, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0004316568374634, "sampling/importance_sampling_ratio/min": 0.4787577688694, "sampling/sampling_logp_difference/max": 0.7890095710754395, "sampling/sampling_logp_difference/mean": 0.021380485966801643, "step": 177 }, { "clip_ratio/high_max": 0.006433453876525164, "clip_ratio/high_mean": 0.004122748970985413, "clip_ratio/low_mean": 0.002760890871286392, "clip_ratio/low_min": 0.002107037464156747, "clip_ratio/region_mean": 0.006883639842271805, "entropy": 0.5724051594734192, "epoch": 1.2191780821917808, "grad_norm": 1.212991607454397, "kl": 0.08736127614974976, "learning_rate": 4.696917808219178e-07, "loss": 0.0014, "step": 178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 981.0, "completions/max_terminated_length": 981.0, "completions/mean_length": 375.6607360839844, "completions/mean_terminated_length": 375.6607360839844, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.5554942488670349, "epoch": 1.226027397260274, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.4652775466000865, "kl": 0.08852525800466537, "learning_rate": 4.6952054794520545e-07, "loss": -0.0003, "num_tokens": 3484703.0, "reward": 0.42197003960609436, "reward_std": 0.10775910317897797, "rewards/check_gptzero_func/mean": 0.42197006940841675, "rewards/check_gptzero_func/std": 0.3908144533634186, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.000766396522522, "sampling/importance_sampling_ratio/min": 0.5082383155822754, "sampling/sampling_logp_difference/max": 0.8379387855529785, "sampling/sampling_logp_difference/mean": 0.022145846858620644, "step": 179 }, { "clip_ratio/high_max": 0.01029159501194954, "clip_ratio/high_mean": 0.0072182826697826385, "clip_ratio/low_mean": 0.0052956086583435535, "clip_ratio/low_min": 0.002088305540382862, "clip_ratio/region_mean": 0.012513890862464905, "entropy": 0.5564010739326477, "epoch": 1.2328767123287672, "grad_norm": 1.319464482201757, "kl": 0.0860695168375969, "learning_rate": 4.693493150684931e-07, "loss": -0.006, "step": 180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1182.0, "completions/max_terminated_length": 1182.0, "completions/mean_length": 499.8750305175781, "completions/mean_terminated_length": 499.8750305175781, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "entropy": 0.5578697323799133, "epoch": 1.2397260273972603, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.2508143984793298, "kl": 0.0803849846124649, "learning_rate": 4.691780821917808e-07, "loss": 0.003, "num_tokens": 3517742.0, "reward": 0.3124339282512665, "reward_std": 0.15812118351459503, "rewards/check_gptzero_func/mean": 0.3124339282512665, "rewards/check_gptzero_func/std": 0.2902050316333771, "sampling/importance_sampling_ratio/max": 1.7511475086212158, "sampling/importance_sampling_ratio/mean": 0.9997240900993347, "sampling/importance_sampling_ratio/min": 0.575425386428833, "sampling/sampling_logp_difference/max": 0.5602712631225586, "sampling/sampling_logp_difference/mean": 0.021086815744638443, "step": 181 }, { "clip_ratio/high_max": 0.00817661453038454, "clip_ratio/high_mean": 0.005937420763075352, "clip_ratio/low_mean": 0.0033371353056281805, "clip_ratio/low_min": 0.0021450857166200876, "clip_ratio/region_mean": 0.009274556301534176, "entropy": 0.5578427314758301, "epoch": 1.2465753424657535, "grad_norm": 1.1744135402847882, "kl": 0.074305959045887, "learning_rate": 4.6900684931506846e-07, "loss": -0.0016, "step": 182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1492.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 456.1964416503906, "completions/mean_terminated_length": 456.1964416503906, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.6250082850456238, "epoch": 1.2534246575342465, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.2839021841218292, "kl": 0.09042422473430634, "learning_rate": 4.688356164383561e-07, "loss": 0.0037, "num_tokens": 3547961.0, "reward": 0.43827104568481445, "reward_std": 0.07413661479949951, "rewards/check_gptzero_func/mean": 0.4382709860801697, "rewards/check_gptzero_func/std": 0.3435516655445099, "sampling/importance_sampling_ratio/max": 1.6826283931732178, "sampling/importance_sampling_ratio/mean": 1.0003445148468018, "sampling/importance_sampling_ratio/min": 0.5145659446716309, "sampling/sampling_logp_difference/max": 0.6644315719604492, "sampling/sampling_logp_difference/mean": 0.0225189421325922, "step": 183 }, { "clip_ratio/high_max": 0.009034711867570877, "clip_ratio/high_mean": 0.005353555083274841, "clip_ratio/low_mean": 0.0033751337323337793, "clip_ratio/low_min": 0.0015885623870417476, "clip_ratio/region_mean": 0.008728688582777977, "entropy": 0.6260138750076294, "epoch": 1.2602739726027397, "grad_norm": 1.2287577018015743, "kl": 0.08753658086061478, "learning_rate": 4.6866438356164386e-07, "loss": -0.0009, "step": 184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1005.0, "completions/max_terminated_length": 1005.0, "completions/mean_length": 469.607177734375, "completions/mean_terminated_length": 469.607177734375, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.5850890278816223, "epoch": 1.2671232876712328, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3347021877938037, "kl": 0.0904083400964737, "learning_rate": 4.684931506849315e-07, "loss": -0.0007, "num_tokens": 3579179.0, "reward": 0.353742390871048, "reward_std": 0.12633375823497772, "rewards/check_gptzero_func/mean": 0.353742390871048, "rewards/check_gptzero_func/std": 0.3476913273334503, "sampling/importance_sampling_ratio/max": 1.710560917854309, "sampling/importance_sampling_ratio/mean": 0.999818742275238, "sampling/importance_sampling_ratio/min": 0.3659972846508026, "sampling/sampling_logp_difference/max": 1.005129337310791, "sampling/sampling_logp_difference/mean": 0.021339626982808113, "step": 185 }, { "clip_ratio/high_max": 0.009042719379067421, "clip_ratio/high_mean": 0.007245394866913557, "clip_ratio/low_mean": 0.004095069598406553, "clip_ratio/low_min": 0.0027033670339733362, "clip_ratio/region_mean": 0.01134046446532011, "entropy": 0.5881333351135254, "epoch": 1.273972602739726, "grad_norm": 1.4040829444463572, "kl": 0.09740336984395981, "learning_rate": 4.6832191780821916e-07, "loss": -0.0054, "step": 186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1122.0, "completions/max_terminated_length": 1122.0, "completions/mean_length": 460.9107360839844, "completions/mean_terminated_length": 460.9107360839844, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.6158580780029297, "epoch": 1.2808219178082192, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.349902670996695, "kl": 0.09852626174688339, "learning_rate": 4.681506849315068e-07, "loss": -0.0118, "num_tokens": 3609596.0, "reward": 0.3164547383785248, "reward_std": 0.13617688417434692, "rewards/check_gptzero_func/mean": 0.3164547085762024, "rewards/check_gptzero_func/std": 0.337516188621521, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0000956058502197, "sampling/importance_sampling_ratio/min": 0.5718286633491516, "sampling/sampling_logp_difference/max": 0.7283892631530762, "sampling/sampling_logp_difference/mean": 0.022588754072785378, "step": 187 }, { "clip_ratio/high_max": 0.0079647870734334, "clip_ratio/high_mean": 0.00608954718336463, "clip_ratio/low_mean": 0.0034534947481006384, "clip_ratio/low_min": 0.001953125, "clip_ratio/region_mean": 0.009543041698634624, "entropy": 0.6174706220626831, "epoch": 1.2876712328767124, "grad_norm": 1.3876833456160593, "kl": 0.09389277547597885, "learning_rate": 4.679794520547945e-07, "loss": -0.0164, "step": 188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 418.7857360839844, "completions/mean_terminated_length": 418.7857360839844, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.5175802111625671, "epoch": 1.2945205479452055, "frac_reward_zero_std": 0.0, "grad_norm": 1.4115689558133495, "kl": 0.14761103689670563, "learning_rate": 4.6780821917808217e-07, "loss": -0.0212, "num_tokens": 3638134.0, "reward": 0.4534372091293335, "reward_std": 0.13802365958690643, "rewards/check_gptzero_func/mean": 0.4534372389316559, "rewards/check_gptzero_func/std": 0.3113403916358948, "sampling/importance_sampling_ratio/max": 1.639887809753418, "sampling/importance_sampling_ratio/mean": 0.9998306632041931, "sampling/importance_sampling_ratio/min": 0.44504058361053467, "sampling/sampling_logp_difference/max": 0.8095898628234863, "sampling/sampling_logp_difference/mean": 0.020337799564003944, "step": 189 }, { "clip_ratio/high_max": 0.011627906933426857, "clip_ratio/high_mean": 0.008181245066225529, "clip_ratio/low_mean": 0.004047483205795288, "clip_ratio/low_min": 0.001801531296223402, "clip_ratio/region_mean": 0.012228727340698242, "entropy": 0.5186796188354492, "epoch": 1.3013698630136985, "grad_norm": 1.2568866539326942, "kl": 0.15453574061393738, "learning_rate": 4.676369863013698e-07, "loss": -0.0257, "step": 190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1552.0, "completions/max_terminated_length": 1552.0, "completions/mean_length": 567.7142944335938, "completions/mean_terminated_length": 567.7142944335938, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.6163981556892395, "epoch": 1.308219178082192, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.1913361711545072, "kl": 0.08680734783411026, "learning_rate": 4.6746575342465747e-07, "loss": -0.0249, "num_tokens": 3674008.0, "reward": 0.43954816460609436, "reward_std": 0.11196670681238174, "rewards/check_gptzero_func/mean": 0.43954816460609436, "rewards/check_gptzero_func/std": 0.32531142234802246, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0001375675201416, "sampling/importance_sampling_ratio/min": 0.617642879486084, "sampling/sampling_logp_difference/max": 0.8866510391235352, "sampling/sampling_logp_difference/mean": 0.022422688081860542, "step": 191 }, { "clip_ratio/high_max": 0.006706908345222473, "clip_ratio/high_mean": 0.005230219103395939, "clip_ratio/low_mean": 0.00342856184579432, "clip_ratio/low_min": 0.0017857142956927419, "clip_ratio/region_mean": 0.008658780716359615, "entropy": 0.6164445877075195, "epoch": 1.3150684931506849, "grad_norm": 1.094539674642609, "kl": 0.08543423563241959, "learning_rate": 4.6729452054794517e-07, "loss": -0.0292, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1139.0, "completions/max_terminated_length": 1139.0, "completions/mean_length": 485.21429443359375, "completions/mean_terminated_length": 485.21429443359375, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "entropy": 0.5696818828582764, "epoch": 1.321917808219178, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3091049425472885, "kl": 0.1006428673863411, "learning_rate": 4.671232876712329e-07, "loss": -0.0183, "num_tokens": 3706134.0, "reward": 0.36483731865882874, "reward_std": 0.13721340894699097, "rewards/check_gptzero_func/mean": 0.36483731865882874, "rewards/check_gptzero_func/std": 0.30223700404167175, "sampling/importance_sampling_ratio/max": 1.8603031635284424, "sampling/importance_sampling_ratio/mean": 0.9991811513900757, "sampling/importance_sampling_ratio/min": 0.5819392800331116, "sampling/sampling_logp_difference/max": 0.6207394599914551, "sampling/sampling_logp_difference/mean": 0.02245975285768509, "step": 193 }, { "clip_ratio/high_max": 0.007636122405529022, "clip_ratio/high_mean": 0.005867728032171726, "clip_ratio/low_mean": 0.0046557835303246975, "clip_ratio/low_min": 0.0027777778450399637, "clip_ratio/region_mean": 0.010523512028157711, "entropy": 0.5705229043960571, "epoch": 1.3287671232876712, "grad_norm": 1.1841537932752597, "kl": 0.09176606684923172, "learning_rate": 4.669520547945205e-07, "loss": -0.0233, "step": 194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1229.0, "completions/max_terminated_length": 1229.0, "completions/mean_length": 444.26788330078125, "completions/mean_terminated_length": 444.26788330078125, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "entropy": 0.5754863023757935, "epoch": 1.3356164383561644, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.343176270378971, "kl": 0.11526699364185333, "learning_rate": 4.6678082191780823e-07, "loss": 0.0096, "num_tokens": 3736351.0, "reward": 0.4112183749675751, "reward_std": 0.09473726898431778, "rewards/check_gptzero_func/mean": 0.4112183749675751, "rewards/check_gptzero_func/std": 0.3364427387714386, "sampling/importance_sampling_ratio/max": 1.8121576309204102, "sampling/importance_sampling_ratio/mean": 0.9998839497566223, "sampling/importance_sampling_ratio/min": 0.611285388469696, "sampling/sampling_logp_difference/max": 0.5945181846618652, "sampling/sampling_logp_difference/mean": 0.02203413099050522, "step": 195 }, { "clip_ratio/high_max": 0.008708273060619831, "clip_ratio/high_mean": 0.0061340960673987865, "clip_ratio/low_mean": 0.004276065621525049, "clip_ratio/low_min": 0.0014152850490063429, "clip_ratio/region_mean": 0.010410161688923836, "entropy": 0.5737610459327698, "epoch": 1.3424657534246576, "grad_norm": 1.7371417814734251, "kl": 0.11825668811798096, "learning_rate": 4.666095890410959e-07, "loss": 0.005, "step": 196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1765.0, "completions/max_terminated_length": 1765.0, "completions/mean_length": 505.1785888671875, "completions/mean_terminated_length": 505.1785888671875, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.5596179962158203, "epoch": 1.3493150684931507, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3022129653290084, "kl": 0.12006931006908417, "learning_rate": 4.6643835616438353e-07, "loss": 0.0078, "num_tokens": 3769459.0, "reward": 0.4011520743370056, "reward_std": 0.10693982988595963, "rewards/check_gptzero_func/mean": 0.4011520743370056, "rewards/check_gptzero_func/std": 0.30186727643013, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9997532963752747, "sampling/importance_sampling_ratio/min": 0.5104373097419739, "sampling/sampling_logp_difference/max": 0.7576985359191895, "sampling/sampling_logp_difference/mean": 0.02166983112692833, "step": 197 }, { "clip_ratio/high_max": 0.007274380419403315, "clip_ratio/high_mean": 0.005530033726245165, "clip_ratio/low_mean": 0.004096482414752245, "clip_ratio/low_min": 0.002030751435086131, "clip_ratio/region_mean": 0.00962651614099741, "entropy": 0.5604873299598694, "epoch": 1.356164383561644, "grad_norm": 1.172042598847619, "kl": 0.1155269667506218, "learning_rate": 4.662671232876712e-07, "loss": 0.003, "step": 198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1541.0, "completions/max_terminated_length": 1541.0, "completions/mean_length": 514.1607666015625, "completions/mean_terminated_length": 514.1607666015625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.5928698778152466, "epoch": 1.3630136986301369, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.2730861918779324, "kl": 0.11092351377010345, "learning_rate": 4.660958904109589e-07, "loss": -0.0255, "num_tokens": 3803320.0, "reward": 0.37175890803337097, "reward_std": 0.11942640691995621, "rewards/check_gptzero_func/mean": 0.37175890803337097, "rewards/check_gptzero_func/std": 0.26837804913520813, "sampling/importance_sampling_ratio/max": 1.8571339845657349, "sampling/importance_sampling_ratio/mean": 0.9998840689659119, "sampling/importance_sampling_ratio/min": 0.14770746231079102, "sampling/sampling_logp_difference/max": 1.9125216007232666, "sampling/sampling_logp_difference/mean": 0.02163623832166195, "step": 199 }, { "clip_ratio/high_max": 0.008676092140376568, "clip_ratio/high_mean": 0.006248102523386478, "clip_ratio/low_mean": 0.0039655184373259544, "clip_ratio/low_min": 0.0026284349150955677, "clip_ratio/region_mean": 0.010213620029389858, "entropy": 0.594885528087616, "epoch": 1.36986301369863, "grad_norm": 1.183068870337996, "kl": 0.10320527851581573, "learning_rate": 4.6592465753424653e-07, "loss": -0.0301, "step": 200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 418.14288330078125, "completions/mean_terminated_length": 418.14288330078125, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.5198522210121155, "epoch": 1.3767123287671232, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3689988728897204, "kl": 0.15181109309196472, "learning_rate": 4.657534246575342e-07, "loss": 0.0014, "num_tokens": 3831748.0, "reward": 0.42511245608329773, "reward_std": 0.12990108132362366, "rewards/check_gptzero_func/mean": 0.42511245608329773, "rewards/check_gptzero_func/std": 0.3264060616493225, "sampling/importance_sampling_ratio/max": 1.565150260925293, "sampling/importance_sampling_ratio/mean": 1.0000152587890625, "sampling/importance_sampling_ratio/min": 0.4609906077384949, "sampling/sampling_logp_difference/max": 0.7743775844573975, "sampling/sampling_logp_difference/mean": 0.01964198611676693, "step": 201 }, { "clip_ratio/high_max": 0.011688311584293842, "clip_ratio/high_mean": 0.008110369555652142, "clip_ratio/low_mean": 0.004670106805860996, "clip_ratio/low_min": 0.0018140589818358421, "clip_ratio/region_mean": 0.012780477292835712, "entropy": 0.5219186544418335, "epoch": 1.3835616438356164, "grad_norm": 1.2470204247364223, "kl": 0.14427709579467773, "learning_rate": 4.6558219178082194e-07, "loss": -0.0041, "step": 202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2700.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 396.26788330078125, "completions/mean_terminated_length": 396.26788330078125, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.4977007508277893, "epoch": 1.3904109589041096, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.3840836444522164, "kl": 0.1553380787372589, "learning_rate": 4.654109589041096e-07, "loss": -0.0261, "num_tokens": 3858787.0, "reward": 0.5155433416366577, "reward_std": 0.08168389648199081, "rewards/check_gptzero_func/mean": 0.5155433416366577, "rewards/check_gptzero_func/std": 0.3670801818370819, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9996607899665833, "sampling/importance_sampling_ratio/min": 0.5162184834480286, "sampling/sampling_logp_difference/max": 0.8144292831420898, "sampling/sampling_logp_difference/mean": 0.01891191303730011, "step": 203 }, { "clip_ratio/high_max": 0.008430609479546547, "clip_ratio/high_mean": 0.005497280042618513, "clip_ratio/low_mean": 0.004371709655970335, "clip_ratio/low_min": 0.0014297969173640013, "clip_ratio/region_mean": 0.009868989698588848, "entropy": 0.49819740653038025, "epoch": 1.3972602739726028, "grad_norm": 1.2789191089368288, "kl": 0.1512792408466339, "learning_rate": 4.6523972602739724e-07, "loss": -0.0315, "step": 204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1175.0, "completions/max_terminated_length": 1175.0, "completions/mean_length": 454.2857360839844, "completions/mean_terminated_length": 454.2857360839844, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "entropy": 0.5509709119796753, "epoch": 1.404109589041096, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.2634687335314732, "kl": 0.13352139294147491, "learning_rate": 4.650684931506849e-07, "loss": -0.0113, "num_tokens": 3889173.0, "reward": 0.40115097165107727, "reward_std": 0.08552742749452591, "rewards/check_gptzero_func/mean": 0.4011509418487549, "rewards/check_gptzero_func/std": 0.2989251911640167, "sampling/importance_sampling_ratio/max": 1.8519060611724854, "sampling/importance_sampling_ratio/mean": 1.0001741647720337, "sampling/importance_sampling_ratio/min": 0.38417142629623413, "sampling/sampling_logp_difference/max": 0.9566664695739746, "sampling/sampling_logp_difference/mean": 0.02112649194896221, "step": 205 }, { "clip_ratio/high_max": 0.008432349190115929, "clip_ratio/high_mean": 0.006122333463281393, "clip_ratio/low_mean": 0.004176727961748838, "clip_ratio/low_min": 0.0028148149140179157, "clip_ratio/region_mean": 0.010299062356352806, "entropy": 0.5526365637779236, "epoch": 1.410958904109589, "grad_norm": 1.1727338756873105, "kl": 0.13099002838134766, "learning_rate": 4.648972602739726e-07, "loss": -0.0163, "step": 206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 515.0357666015625, "completions/mean_terminated_length": 515.0357666015625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.5915073752403259, "epoch": 1.4178082191780823, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.2453512595853422, "kl": 0.11347120255231857, "learning_rate": 4.6472602739726025e-07, "loss": 0.0028, "num_tokens": 3923241.0, "reward": 0.3543092906475067, "reward_std": 0.16193121671676636, "rewards/check_gptzero_func/mean": 0.3543092906475067, "rewards/check_gptzero_func/std": 0.2913196384906769, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9997066855430603, "sampling/importance_sampling_ratio/min": 0.49668651819229126, "sampling/sampling_logp_difference/max": 0.8566141128540039, "sampling/sampling_logp_difference/mean": 0.02183356136083603, "step": 207 }, { "clip_ratio/high_max": 0.008083140477538109, "clip_ratio/high_mean": 0.005844703409820795, "clip_ratio/low_mean": 0.0036968295462429523, "clip_ratio/low_min": 0.0019099591299891472, "clip_ratio/region_mean": 0.009541532956063747, "entropy": 0.5918721556663513, "epoch": 1.4246575342465753, "grad_norm": 1.1323621514862343, "kl": 0.1130494549870491, "learning_rate": 4.645547945205479e-07, "loss": -0.0021, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1544.0, "completions/max_terminated_length": 1544.0, "completions/mean_length": 372.14288330078125, "completions/mean_terminated_length": 372.14288330078125, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.5215978622436523, "epoch": 1.4315068493150684, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.503663580603606, "kl": 0.15203510224819183, "learning_rate": 4.643835616438356e-07, "loss": 0.0073, "num_tokens": 3948797.0, "reward": 0.4123433232307434, "reward_std": 0.08679980784654617, "rewards/check_gptzero_func/mean": 0.412343293428421, "rewards/check_gptzero_func/std": 0.33011093735694885, "sampling/importance_sampling_ratio/max": 1.6199498176574707, "sampling/importance_sampling_ratio/mean": 0.9999299645423889, "sampling/importance_sampling_ratio/min": 0.5887268781661987, "sampling/sampling_logp_difference/max": 0.5297929048538208, "sampling/sampling_logp_difference/mean": 0.02023615688085556, "step": 209 }, { "clip_ratio/high_max": 0.01065162941813469, "clip_ratio/high_mean": 0.00830905046314001, "clip_ratio/low_mean": 0.0049112397246062756, "clip_ratio/low_min": 0.002977801952511072, "clip_ratio/region_mean": 0.013220290653407574, "entropy": 0.5230485796928406, "epoch": 1.4383561643835616, "grad_norm": 1.4941758407476036, "kl": 0.15498144924640656, "learning_rate": 4.6421232876712325e-07, "loss": 0.0021, "step": 210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1221.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 431.5357360839844, "completions/mean_terminated_length": 431.5357360839844, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.5667777061462402, "epoch": 1.4452054794520548, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.3660204888166227, "kl": 0.1184803918004036, "learning_rate": 4.6404109589041095e-07, "loss": 0.0075, "num_tokens": 3977695.0, "reward": 0.43015435338020325, "reward_std": 0.09452430158853531, "rewards/check_gptzero_func/mean": 0.43015432357788086, "rewards/check_gptzero_func/std": 0.3596901595592499, "sampling/importance_sampling_ratio/max": 1.733376145362854, "sampling/importance_sampling_ratio/mean": 0.9997225999832153, "sampling/importance_sampling_ratio/min": 0.3463762700557709, "sampling/sampling_logp_difference/max": 1.0602295398712158, "sampling/sampling_logp_difference/mean": 0.02182575687766075, "step": 211 }, { "clip_ratio/high_max": 0.00809935200959444, "clip_ratio/high_mean": 0.006555064581334591, "clip_ratio/low_mean": 0.005421395879238844, "clip_ratio/low_min": 0.003071252955123782, "clip_ratio/region_mean": 0.011976459994912148, "entropy": 0.5644544363021851, "epoch": 1.452054794520548, "grad_norm": 1.2795839437839085, "kl": 0.1237228512763977, "learning_rate": 4.638698630136986e-07, "loss": 0.0022, "step": 212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1334.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 382.9464416503906, "completions/mean_terminated_length": 382.9464416503906, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.515927791595459, "epoch": 1.4589041095890412, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.3688476813197394, "kl": 0.14934222400188446, "learning_rate": 4.636986301369863e-07, "loss": -0.0012, "num_tokens": 4003694.0, "reward": 0.34954962134361267, "reward_std": 0.1108696237206459, "rewards/check_gptzero_func/mean": 0.3495495915412903, "rewards/check_gptzero_func/std": 0.3581481873989105, "sampling/importance_sampling_ratio/max": 1.8310199975967407, "sampling/importance_sampling_ratio/mean": 0.9997681379318237, "sampling/importance_sampling_ratio/min": 0.2932734787464142, "sampling/sampling_logp_difference/max": 1.2266497611999512, "sampling/sampling_logp_difference/mean": 0.02037951909005642, "step": 213 }, { "clip_ratio/high_max": 0.010078740306198597, "clip_ratio/high_mean": 0.008601653389632702, "clip_ratio/low_mean": 0.003990613855421543, "clip_ratio/low_min": 0.0014280613977462053, "clip_ratio/region_mean": 0.01259226631373167, "entropy": 0.515927791595459, "epoch": 1.4657534246575343, "grad_norm": 1.2119781344642764, "kl": 0.14840081334114075, "learning_rate": 4.6352739726027396e-07, "loss": -0.0067, "step": 214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1236.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 547.357177734375, "completions/mean_terminated_length": 547.357177734375, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "entropy": 0.5944118499755859, "epoch": 1.4726027397260273, "frac_reward_zero_std": 0.0, "grad_norm": 1.205062610819, "kl": 0.1343906968832016, "learning_rate": 4.633561643835616e-07, "loss": -0.0172, "num_tokens": 4039354.0, "reward": 0.32754021883010864, "reward_std": 0.1144959032535553, "rewards/check_gptzero_func/mean": 0.32754018902778625, "rewards/check_gptzero_func/std": 0.2600123882293701, "sampling/importance_sampling_ratio/max": 1.5987446308135986, "sampling/importance_sampling_ratio/mean": 0.9997626543045044, "sampling/importance_sampling_ratio/min": 0.5918364524841309, "sampling/sampling_logp_difference/max": 0.5245249271392822, "sampling/sampling_logp_difference/mean": 0.022063614800572395, "step": 215 }, { "clip_ratio/high_max": 0.010693454183638096, "clip_ratio/high_mean": 0.006651707459241152, "clip_ratio/low_mean": 0.004254932515323162, "clip_ratio/low_min": 0.0016474464209750295, "clip_ratio/region_mean": 0.010906639508903027, "entropy": 0.5963621139526367, "epoch": 1.4794520547945205, "grad_norm": 1.1277374603285775, "kl": 0.13062940537929535, "learning_rate": 4.631849315068493e-07, "loss": -0.0226, "step": 216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1142.0, "completions/max_terminated_length": 1142.0, "completions/mean_length": 415.33929443359375, "completions/mean_terminated_length": 415.33929443359375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.6152611970901489, "epoch": 1.4863013698630136, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.423273254168082, "kl": 0.16969643533229828, "learning_rate": 4.6301369863013696e-07, "loss": -0.0132, "num_tokens": 4067645.0, "reward": 0.36642593145370483, "reward_std": 0.07929947227239609, "rewards/check_gptzero_func/mean": 0.36642593145370483, "rewards/check_gptzero_func/std": 0.31481847167015076, "sampling/importance_sampling_ratio/max": 1.608891487121582, "sampling/importance_sampling_ratio/mean": 1.0002304315567017, "sampling/importance_sampling_ratio/min": 0.6111520528793335, "sampling/sampling_logp_difference/max": 0.49240946769714355, "sampling/sampling_logp_difference/mean": 0.021663108840584755, "step": 217 }, { "clip_ratio/high_max": 0.009292352013289928, "clip_ratio/high_mean": 0.007104068994522095, "clip_ratio/low_mean": 0.004943522159010172, "clip_ratio/low_min": 0.0036968577187508345, "clip_ratio/region_mean": 0.01204759068787098, "entropy": 0.615642249584198, "epoch": 1.4931506849315068, "grad_norm": 1.2758079154814044, "kl": 0.16810132563114166, "learning_rate": 4.628424657534246e-07, "loss": -0.0188, "step": 218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1530.0, "completions/max_terminated_length": 1530.0, "completions/mean_length": 464.14288330078125, "completions/mean_terminated_length": 464.14288330078125, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.5862627029418945, "epoch": 1.5, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.2924016164371905, "kl": 0.13205572962760925, "learning_rate": 4.6267123287671226e-07, "loss": 0.0054, "num_tokens": 4098601.0, "reward": 0.4755569398403168, "reward_std": 0.11342314630746841, "rewards/check_gptzero_func/mean": 0.4755569100379944, "rewards/check_gptzero_func/std": 0.318477600812912, "sampling/importance_sampling_ratio/max": 1.8357675075531006, "sampling/importance_sampling_ratio/mean": 1.0002634525299072, "sampling/importance_sampling_ratio/min": 0.40722203254699707, "sampling/sampling_logp_difference/max": 0.8983967304229736, "sampling/sampling_logp_difference/mean": 0.021522782742977142, "step": 219 }, { "clip_ratio/high_max": 0.008807247504591942, "clip_ratio/high_mean": 0.006174187641590834, "clip_ratio/low_mean": 0.004799947142601013, "clip_ratio/low_min": 0.0031976124737411737, "clip_ratio/region_mean": 0.01097413431853056, "entropy": 0.5838390588760376, "epoch": 1.5068493150684932, "grad_norm": 1.2375036888522732, "kl": 0.1420675814151764, "learning_rate": 4.625e-07, "loss": 0.0004, "step": 220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 468.8035888671875, "completions/mean_terminated_length": 468.8035888671875, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "entropy": 0.5334023833274841, "epoch": 1.5136986301369864, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.341887984323037, "kl": 0.13835345208644867, "learning_rate": 4.6232876712328767e-07, "loss": -0.0032, "num_tokens": 4129660.0, "reward": 0.5552217960357666, "reward_std": 0.07600116729736328, "rewards/check_gptzero_func/mean": 0.5552217364311218, "rewards/check_gptzero_func/std": 0.2802400588989258, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9996392130851746, "sampling/importance_sampling_ratio/min": 0.295183926820755, "sampling/sampling_logp_difference/max": 1.2201566696166992, "sampling/sampling_logp_difference/mean": 0.020448297262191772, "step": 221 }, { "clip_ratio/high_max": 0.007370474748313427, "clip_ratio/high_mean": 0.005937907844781876, "clip_ratio/low_mean": 0.0041716331616044044, "clip_ratio/low_min": 0.0022650056052953005, "clip_ratio/region_mean": 0.01010954100638628, "entropy": 0.5341101884841919, "epoch": 1.5205479452054793, "grad_norm": 1.1954034427273417, "kl": 0.13863913714885712, "learning_rate": 4.621575342465753e-07, "loss": -0.008, "step": 222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1382.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 450.1607360839844, "completions/mean_terminated_length": 450.1607360839844, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.5273581743240356, "epoch": 1.5273972602739727, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3099073807490746, "kl": 0.15210020542144775, "learning_rate": 4.6198630136986297e-07, "loss": 0.0095, "num_tokens": 4159559.0, "reward": 0.4673656225204468, "reward_std": 0.06289339065551758, "rewards/check_gptzero_func/mean": 0.4673655927181244, "rewards/check_gptzero_func/std": 0.31528085470199585, "sampling/importance_sampling_ratio/max": 1.9693684577941895, "sampling/importance_sampling_ratio/mean": 0.999647319316864, "sampling/importance_sampling_ratio/min": 0.5692357420921326, "sampling/sampling_logp_difference/max": 0.6777129173278809, "sampling/sampling_logp_difference/mean": 0.02007289044559002, "step": 223 }, { "clip_ratio/high_max": 0.008287292905151844, "clip_ratio/high_mean": 0.006440014578402042, "clip_ratio/low_mean": 0.004661794751882553, "clip_ratio/low_min": 0.003376304404810071, "clip_ratio/region_mean": 0.01110181026160717, "entropy": 0.5289842486381531, "epoch": 1.5342465753424657, "grad_norm": 1.1968769518396247, "kl": 0.15208008885383606, "learning_rate": 4.6181506849315067e-07, "loss": 0.0041, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1917.0, "completions/max_terminated_length": 1917.0, "completions/mean_length": 540.1785888671875, "completions/mean_terminated_length": 540.1785888671875, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "entropy": 0.6254929900169373, "epoch": 1.541095890410959, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.208292762702841, "kl": 0.11688883602619171, "learning_rate": 4.616438356164383e-07, "loss": -0.013, "num_tokens": 4194743.0, "reward": 0.3873192369937897, "reward_std": 0.11311237514019012, "rewards/check_gptzero_func/mean": 0.3873192369937897, "rewards/check_gptzero_func/std": 0.33856165409088135, "sampling/importance_sampling_ratio/max": 1.7168973684310913, "sampling/importance_sampling_ratio/mean": 1.0006049871444702, "sampling/importance_sampling_ratio/min": 0.5511825084686279, "sampling/sampling_logp_difference/max": 0.5956892967224121, "sampling/sampling_logp_difference/mean": 0.021963896229863167, "step": 225 }, { "clip_ratio/high_max": 0.009150991216301918, "clip_ratio/high_mean": 0.006615838967263699, "clip_ratio/low_mean": 0.0031347444746643305, "clip_ratio/low_min": 0.0018450184725224972, "clip_ratio/region_mean": 0.009750583209097385, "entropy": 0.6265473365783691, "epoch": 1.547945205479452, "grad_norm": 1.092999985459118, "kl": 0.11603633314371109, "learning_rate": 4.61472602739726e-07, "loss": -0.0182, "step": 226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1441.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 446.9464416503906, "completions/mean_terminated_length": 446.9464416503906, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.5290102958679199, "epoch": 1.5547945205479452, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3248070107756926, "kl": 0.16533255577087402, "learning_rate": 4.613013698630137e-07, "loss": -0.0158, "num_tokens": 4224720.0, "reward": 0.45488643646240234, "reward_std": 0.11816742271184921, "rewards/check_gptzero_func/mean": 0.45488640666007996, "rewards/check_gptzero_func/std": 0.31417179107666016, "sampling/importance_sampling_ratio/max": 1.7671537399291992, "sampling/importance_sampling_ratio/mean": 1.000232458114624, "sampling/importance_sampling_ratio/min": 0.3546125292778015, "sampling/sampling_logp_difference/max": 1.0367295742034912, "sampling/sampling_logp_difference/mean": 0.020527465268969536, "step": 227 }, { "clip_ratio/high_max": 0.007677542977035046, "clip_ratio/high_mean": 0.006303598638623953, "clip_ratio/low_mean": 0.0042644646018743515, "clip_ratio/low_min": 0.002761000767350197, "clip_ratio/region_mean": 0.010568062774837017, "entropy": 0.5291692018508911, "epoch": 1.5616438356164384, "grad_norm": 1.21047320233182, "kl": 0.1566748172044754, "learning_rate": 4.6113013698630133e-07, "loss": -0.0208, "step": 228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 486.14288330078125, "completions/mean_terminated_length": 486.14288330078125, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.562491774559021, "epoch": 1.5684931506849316, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.2397723335315023, "kl": 0.16780677437782288, "learning_rate": 4.6095890410958903e-07, "loss": -0.0009, "num_tokens": 4256908.0, "reward": 0.48666438460350037, "reward_std": 0.09400968253612518, "rewards/check_gptzero_func/mean": 0.486664354801178, "rewards/check_gptzero_func/std": 0.33585935831069946, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0002000331878662, "sampling/importance_sampling_ratio/min": 0.5883741974830627, "sampling/sampling_logp_difference/max": 0.7051243782043457, "sampling/sampling_logp_difference/mean": 0.021514588966965675, "step": 229 }, { "clip_ratio/high_max": 0.010488245636224747, "clip_ratio/high_mean": 0.0072159795090556145, "clip_ratio/low_mean": 0.004513900727033615, "clip_ratio/low_min": 0.0025316455867141485, "clip_ratio/region_mean": 0.01172988023608923, "entropy": 0.5634901523590088, "epoch": 1.5753424657534247, "grad_norm": 1.136734929704533, "kl": 0.17659132182598114, "learning_rate": 4.607876712328767e-07, "loss": -0.006, "step": 230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1266.0, "completions/max_terminated_length": 1266.0, "completions/mean_length": 494.83929443359375, "completions/mean_terminated_length": 494.83929443359375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.5407490134239197, "epoch": 1.5821917808219177, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.2562499353699723, "kl": 0.16888800263404846, "learning_rate": 4.606164383561644e-07, "loss": 0.0044, "num_tokens": 4289493.0, "reward": 0.3836750090122223, "reward_std": 0.11266989260911942, "rewards/check_gptzero_func/mean": 0.3836750090122223, "rewards/check_gptzero_func/std": 0.29069504141807556, "sampling/importance_sampling_ratio/max": 1.780730128288269, "sampling/importance_sampling_ratio/mean": 1.0001459121704102, "sampling/importance_sampling_ratio/min": 0.5243507027626038, "sampling/sampling_logp_difference/max": 0.6455944776535034, "sampling/sampling_logp_difference/mean": 0.021006053313612938, "step": 231 }, { "clip_ratio/high_max": 0.010299625806510448, "clip_ratio/high_mean": 0.006574638187885284, "clip_ratio/low_mean": 0.003786968532949686, "clip_ratio/low_min": 0.0027152830734848976, "clip_ratio/region_mean": 0.010361607186496258, "entropy": 0.5407490134239197, "epoch": 1.589041095890411, "grad_norm": 1.5190254754700458, "kl": 0.2033708393573761, "learning_rate": 4.6044520547945204e-07, "loss": -0.0006, "step": 232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 495.96429443359375, "completions/mean_terminated_length": 495.96429443359375, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "entropy": 0.5240076780319214, "epoch": 1.595890410958904, "frac_reward_zero_std": 0.0, "grad_norm": 1.258365337831987, "kl": 0.16168098151683807, "learning_rate": 4.602739726027397e-07, "loss": -0.0035, "num_tokens": 4321953.0, "reward": 0.39599645137786865, "reward_std": 0.10491066426038742, "rewards/check_gptzero_func/mean": 0.39599642157554626, "rewards/check_gptzero_func/std": 0.2332981675863266, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0002243518829346, "sampling/importance_sampling_ratio/min": 0.0660075843334198, "sampling/sampling_logp_difference/max": 2.7179856300354004, "sampling/sampling_logp_difference/mean": 0.02043316327035427, "step": 233 }, { "clip_ratio/high_max": 0.010673985816538334, "clip_ratio/high_mean": 0.00789667759090662, "clip_ratio/low_mean": 0.004032228607684374, "clip_ratio/low_min": 0.0021307982970029116, "clip_ratio/region_mean": 0.011928906664252281, "entropy": 0.525996744632721, "epoch": 1.6027397260273972, "grad_norm": 1.1863403383606081, "kl": 0.14803798496723175, "learning_rate": 4.601027397260274e-07, "loss": -0.0088, "step": 234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 540.75, "completions/mean_terminated_length": 540.75, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.6620880961418152, "epoch": 1.6095890410958904, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.2199684127927009, "kl": 0.12126902490854263, "learning_rate": 4.5993150684931504e-07, "loss": 0.004, "num_tokens": 4356521.0, "reward": 0.39257845282554626, "reward_std": 0.11403729766607285, "rewards/check_gptzero_func/mean": 0.3925784230232239, "rewards/check_gptzero_func/std": 0.30309951305389404, "sampling/importance_sampling_ratio/max": 1.7850239276885986, "sampling/importance_sampling_ratio/mean": 1.000106692314148, "sampling/importance_sampling_ratio/min": 0.405701220035553, "sampling/sampling_logp_difference/max": 0.902138352394104, "sampling/sampling_logp_difference/mean": 0.022285467013716698, "step": 235 }, { "clip_ratio/high_max": 0.008106542751193047, "clip_ratio/high_mean": 0.00561151746660471, "clip_ratio/low_mean": 0.003495762823149562, "clip_ratio/low_min": 0.002072538947686553, "clip_ratio/region_mean": 0.00910728145390749, "entropy": 0.6625053286552429, "epoch": 1.6164383561643836, "grad_norm": 1.145310700754605, "kl": 0.12301778793334961, "learning_rate": 4.597602739726027e-07, "loss": -0.0009, "step": 236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1170.0, "completions/max_terminated_length": 1170.0, "completions/mean_length": 416.89288330078125, "completions/mean_terminated_length": 416.89288330078125, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.5454555749893188, "epoch": 1.6232876712328768, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.4157752699905906, "kl": 0.13857616484165192, "learning_rate": 4.595890410958904e-07, "loss": 0.0397, "num_tokens": 4385123.0, "reward": 0.38863280415534973, "reward_std": 0.06469641625881195, "rewards/check_gptzero_func/mean": 0.38863280415534973, "rewards/check_gptzero_func/std": 0.280761182308197, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9996526837348938, "sampling/importance_sampling_ratio/min": 0.5202401280403137, "sampling/sampling_logp_difference/max": 0.8956842422485352, "sampling/sampling_logp_difference/mean": 0.020503003150224686, "step": 237 }, { "clip_ratio/high_max": 0.009081735275685787, "clip_ratio/high_mean": 0.007501490879803896, "clip_ratio/low_mean": 0.006469167768955231, "clip_ratio/low_min": 0.0025731746573001146, "clip_ratio/region_mean": 0.01397065818309784, "entropy": 0.542076051235199, "epoch": 1.6301369863013697, "grad_norm": 1.2908907617039398, "kl": 0.14571616053581238, "learning_rate": 4.594178082191781e-07, "loss": 0.0343, "step": 238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1025.0, "completions/max_terminated_length": 1025.0, "completions/mean_length": 361.2321472167969, "completions/mean_terminated_length": 361.2321472167969, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.5276197791099548, "epoch": 1.6369863013698631, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.454234567880221, "kl": 0.1556556224822998, "learning_rate": 4.5924657534246575e-07, "loss": 0.0119, "num_tokens": 4410158.0, "reward": 0.5356959104537964, "reward_std": 0.11558336019515991, "rewards/check_gptzero_func/mean": 0.5356959104537964, "rewards/check_gptzero_func/std": 0.36207321286201477, "sampling/importance_sampling_ratio/max": 1.853845477104187, "sampling/importance_sampling_ratio/mean": 0.9999656081199646, "sampling/importance_sampling_ratio/min": 0.31340962648391724, "sampling/sampling_logp_difference/max": 1.1602442264556885, "sampling/sampling_logp_difference/mean": 0.02007870376110077, "step": 239 }, { "clip_ratio/high_max": 0.010150963440537453, "clip_ratio/high_mean": 0.007968844845890999, "clip_ratio/low_mean": 0.004048611037433147, "clip_ratio/low_min": 0.0017084282590076327, "clip_ratio/region_mean": 0.012017455883324146, "entropy": 0.5271391868591309, "epoch": 1.643835616438356, "grad_norm": 4.290109342695046, "kl": 0.25641682744026184, "learning_rate": 4.590753424657534e-07, "loss": 0.0066, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1965.0, "completions/max_terminated_length": 1965.0, "completions/mean_length": 497.0357360839844, "completions/mean_terminated_length": 497.0357360839844, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "entropy": 0.5510287880897522, "epoch": 1.6506849315068495, "frac_reward_zero_std": 0.0, "grad_norm": 1.26281429485491, "kl": 0.15447402000427246, "learning_rate": 4.589041095890411e-07, "loss": -0.0115, "num_tokens": 4443134.0, "reward": 0.4369261860847473, "reward_std": 0.1099676713347435, "rewards/check_gptzero_func/mean": 0.43692612648010254, "rewards/check_gptzero_func/std": 0.24742093682289124, "sampling/importance_sampling_ratio/max": 1.6029040813446045, "sampling/importance_sampling_ratio/mean": 1.0005286931991577, "sampling/importance_sampling_ratio/min": 0.43293964862823486, "sampling/sampling_logp_difference/max": 0.8371570110321045, "sampling/sampling_logp_difference/mean": 0.021036647260189056, "step": 241 }, { "clip_ratio/high_max": 0.0068807341158390045, "clip_ratio/high_mean": 0.005199361592531204, "clip_ratio/low_mean": 0.005476776976138353, "clip_ratio/low_min": 0.003511236049234867, "clip_ratio/region_mean": 0.01067613810300827, "entropy": 0.550046980381012, "epoch": 1.6575342465753424, "grad_norm": 1.2151478011443186, "kl": 0.15487073361873627, "learning_rate": 4.5873287671232875e-07, "loss": -0.0166, "step": 242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1521.0, "completions/max_terminated_length": 1521.0, "completions/mean_length": 401.01788330078125, "completions/mean_terminated_length": 401.01788330078125, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.50827556848526, "epoch": 1.6643835616438356, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.4061978317639554, "kl": 0.1899154931306839, "learning_rate": 4.585616438356164e-07, "loss": 0.0177, "num_tokens": 4470699.0, "reward": 0.4572173058986664, "reward_std": 0.0742298811674118, "rewards/check_gptzero_func/mean": 0.457217276096344, "rewards/check_gptzero_func/std": 0.29416367411613464, "sampling/importance_sampling_ratio/max": 1.9693646430969238, "sampling/importance_sampling_ratio/mean": 0.9999375939369202, "sampling/importance_sampling_ratio/min": 0.5450739860534668, "sampling/sampling_logp_difference/max": 0.677711009979248, "sampling/sampling_logp_difference/mean": 0.02015872858464718, "step": 243 }, { "clip_ratio/high_max": 0.010288066230714321, "clip_ratio/high_mean": 0.007806879002600908, "clip_ratio/low_mean": 0.005508380476385355, "clip_ratio/low_min": 0.0033818059600889683, "clip_ratio/region_mean": 0.013315259478986263, "entropy": 0.506721556186676, "epoch": 1.6712328767123288, "grad_norm": 1.2316422737262664, "kl": 0.18693529069423676, "learning_rate": 4.5839041095890405e-07, "loss": 0.0119, "step": 244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1000.0, "completions/max_terminated_length": 1000.0, "completions/mean_length": 433.46429443359375, "completions/mean_terminated_length": 433.46429443359375, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.5715472102165222, "epoch": 1.678082191780822, "frac_reward_zero_std": 0.0, "grad_norm": 1.3558103865883249, "kl": 0.16765883564949036, "learning_rate": 4.5821917808219176e-07, "loss": 0.0138, "num_tokens": 4499749.0, "reward": 0.43134409189224243, "reward_std": 0.13629339635372162, "rewards/check_gptzero_func/mean": 0.43134406208992004, "rewards/check_gptzero_func/std": 0.29217517375946045, "sampling/importance_sampling_ratio/max": 1.8349946737289429, "sampling/importance_sampling_ratio/mean": 0.9999254941940308, "sampling/importance_sampling_ratio/min": 0.5038025379180908, "sampling/sampling_logp_difference/max": 0.6855709552764893, "sampling/sampling_logp_difference/mean": 0.021262342110276222, "step": 245 }, { "clip_ratio/high_max": 0.00883946381509304, "clip_ratio/high_mean": 0.006507223006337881, "clip_ratio/low_mean": 0.004619404207915068, "clip_ratio/low_min": 0.0018604651559144258, "clip_ratio/region_mean": 0.011126627214252949, "entropy": 0.5708103775978088, "epoch": 1.6849315068493151, "grad_norm": 1.2180002723309642, "kl": 0.1635671705007553, "learning_rate": 4.5804794520547946e-07, "loss": 0.0079, "step": 246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1435.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 435.26788330078125, "completions/mean_terminated_length": 435.26788330078125, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "entropy": 0.5525324940681458, "epoch": 1.691780821917808, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.7498409066758023, "kl": 0.1955144852399826, "learning_rate": 4.578767123287671e-07, "loss": 0.0019, "num_tokens": 4529016.0, "reward": 0.5028177499771118, "reward_std": 0.08285757899284363, "rewards/check_gptzero_func/mean": 0.5028177499771118, "rewards/check_gptzero_func/std": 0.27123120427131653, "sampling/importance_sampling_ratio/max": 1.6609089374542236, "sampling/importance_sampling_ratio/mean": 0.9999098181724548, "sampling/importance_sampling_ratio/min": 0.5487873554229736, "sampling/sampling_logp_difference/max": 0.6000442504882812, "sampling/sampling_logp_difference/mean": 0.021381055936217308, "step": 247 }, { "clip_ratio/high_max": 0.00823170691728592, "clip_ratio/high_mean": 0.006929656025022268, "clip_ratio/low_mean": 0.004722109530121088, "clip_ratio/low_min": 0.0029850746504962444, "clip_ratio/region_mean": 0.011651766486465931, "entropy": 0.5536441206932068, "epoch": 1.6986301369863015, "grad_norm": 1.357451739556453, "kl": 0.1904478818178177, "learning_rate": 4.577054794520548e-07, "loss": -0.0034, "step": 248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1203.0, "completions/max_terminated_length": 1203.0, "completions/mean_length": 365.7321472167969, "completions/mean_terminated_length": 365.7321472167969, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.5424038171768188, "epoch": 1.7054794520547945, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.4586629683506551, "kl": 0.2055169641971588, "learning_rate": 4.5753424657534246e-07, "loss": -0.0013, "num_tokens": 4554441.0, "reward": 0.5013014078140259, "reward_std": 0.04904240742325783, "rewards/check_gptzero_func/mean": 0.5013013482093811, "rewards/check_gptzero_func/std": 0.3352563679218292, "sampling/importance_sampling_ratio/max": 1.8179765939712524, "sampling/importance_sampling_ratio/mean": 0.9996239542961121, "sampling/importance_sampling_ratio/min": 0.5254772901535034, "sampling/sampling_logp_difference/max": 0.6434483528137207, "sampling/sampling_logp_difference/mean": 0.021054815500974655, "step": 249 }, { "clip_ratio/high_max": 0.010890280827879906, "clip_ratio/high_mean": 0.008218762464821339, "clip_ratio/low_mean": 0.0047641838900744915, "clip_ratio/low_min": 0.0012709710281342268, "clip_ratio/region_mean": 0.012982946820557117, "entropy": 0.5414916276931763, "epoch": 1.7123287671232876, "grad_norm": 1.2974184207641513, "kl": 0.21024294197559357, "learning_rate": 4.573630136986301e-07, "loss": -0.0071, "step": 250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 876.0, "completions/max_terminated_length": 876.0, "completions/mean_length": 304.2857360839844, "completions/mean_terminated_length": 304.2857360839844, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.4613683521747589, "epoch": 1.7191780821917808, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.5986324289564953, "kl": 0.20930106937885284, "learning_rate": 4.5719178082191776e-07, "loss": 0.0055, "num_tokens": 4576231.0, "reward": 0.5388298630714417, "reward_std": 0.08991739153862, "rewards/check_gptzero_func/mean": 0.5388298630714417, "rewards/check_gptzero_func/std": 0.36090072989463806, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9993210434913635, "sampling/importance_sampling_ratio/min": 0.549371600151062, "sampling/sampling_logp_difference/max": 0.9743642807006836, "sampling/sampling_logp_difference/mean": 0.018723130226135254, "step": 251 }, { "clip_ratio/high_max": 0.011182108893990517, "clip_ratio/high_mean": 0.009020787663757801, "clip_ratio/low_mean": 0.00678015174344182, "clip_ratio/low_min": 0.0037833531387150288, "clip_ratio/region_mean": 0.01580093801021576, "entropy": 0.46203964948654175, "epoch": 1.726027397260274, "grad_norm": 1.4411174449300816, "kl": 0.21307523548603058, "learning_rate": 4.5702054794520547e-07, "loss": -0.0011, "step": 252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1264.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 390.4285888671875, "completions/mean_terminated_length": 390.4285888671875, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.5033656358718872, "epoch": 1.7328767123287672, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.432075540199294, "kl": 0.18004854023456573, "learning_rate": 4.568493150684931e-07, "loss": -0.0341, "num_tokens": 4603099.0, "reward": 0.514867901802063, "reward_std": 0.09030124545097351, "rewards/check_gptzero_func/mean": 0.514867901802063, "rewards/check_gptzero_func/std": 0.31742775440216064, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0002278089523315, "sampling/importance_sampling_ratio/min": 0.5124419331550598, "sampling/sampling_logp_difference/max": 0.7835935354232788, "sampling/sampling_logp_difference/mean": 0.019643882289528847, "step": 253 }, { "clip_ratio/high_max": 0.00978747196495533, "clip_ratio/high_mean": 0.007849198766052723, "clip_ratio/low_mean": 0.005822158884257078, "clip_ratio/low_min": 0.00390625, "clip_ratio/region_mean": 0.013671358115971088, "entropy": 0.5041753649711609, "epoch": 1.7397260273972601, "grad_norm": 1.2722393245233241, "kl": 0.175362229347229, "learning_rate": 4.5667808219178077e-07, "loss": -0.04, "step": 254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1792.0, "completions/max_terminated_length": 1792.0, "completions/mean_length": 605.732177734375, "completions/mean_terminated_length": 605.732177734375, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.6193663477897644, "epoch": 1.7465753424657535, "frac_reward_zero_std": 0.0, "grad_norm": 1.1644504283289203, "kl": 0.1429295539855957, "learning_rate": 4.565068493150685e-07, "loss": 0.0081, "num_tokens": 4641976.0, "reward": 0.45003095269203186, "reward_std": 0.09220973402261734, "rewards/check_gptzero_func/mean": 0.45003095269203186, "rewards/check_gptzero_func/std": 0.30337801575660706, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.99997478723526, "sampling/importance_sampling_ratio/min": 0.27278798818588257, "sampling/sampling_logp_difference/max": 1.2990604639053345, "sampling/sampling_logp_difference/mean": 0.02262319065630436, "step": 255 }, { "clip_ratio/high_max": 0.010638297535479069, "clip_ratio/high_mean": 0.007456501480191946, "clip_ratio/low_mean": 0.00413839565590024, "clip_ratio/low_min": 0.0026776150334626436, "clip_ratio/region_mean": 0.011594897136092186, "entropy": 0.6207238435745239, "epoch": 1.7534246575342465, "grad_norm": 1.053181717403372, "kl": 0.14285646378993988, "learning_rate": 4.563356164383562e-07, "loss": 0.0027, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1605.0, "completions/max_terminated_length": 1605.0, "completions/mean_length": 599.0535888671875, "completions/mean_terminated_length": 599.0535888671875, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "entropy": 0.5931372046470642, "epoch": 1.7602739726027399, "frac_reward_zero_std": 0.0, "grad_norm": 1.1623880604060597, "kl": 0.15606553852558136, "learning_rate": 4.561643835616438e-07, "loss": 0.0123, "num_tokens": 4680509.0, "reward": 0.4221056401729584, "reward_std": 0.12989889085292816, "rewards/check_gptzero_func/mean": 0.422105610370636, "rewards/check_gptzero_func/std": 0.23863381147384644, "sampling/importance_sampling_ratio/max": 1.907719612121582, "sampling/importance_sampling_ratio/mean": 1.00002121925354, "sampling/importance_sampling_ratio/min": 0.2737833857536316, "sampling/sampling_logp_difference/max": 1.2954180240631104, "sampling/sampling_logp_difference/mean": 0.021910954266786575, "step": 257 }, { "clip_ratio/high_max": 0.00937366858124733, "clip_ratio/high_mean": 0.005866996943950653, "clip_ratio/low_mean": 0.0034259133972227573, "clip_ratio/low_min": 0.001896224799565971, "clip_ratio/region_mean": 0.009292910806834698, "entropy": 0.5939449667930603, "epoch": 1.7671232876712328, "grad_norm": 1.0567261170903635, "kl": 0.1513364315032959, "learning_rate": 4.559931506849315e-07, "loss": 0.0071, "step": 258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 436.0714416503906, "completions/mean_terminated_length": 436.0714416503906, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.5111492872238159, "epoch": 1.773972602739726, "frac_reward_zero_std": 0.0, "grad_norm": 1.3927911927869596, "kl": 0.18839001655578613, "learning_rate": 4.558219178082192e-07, "loss": -0.0002, "num_tokens": 4709545.0, "reward": 0.4922366440296173, "reward_std": 0.1045655757188797, "rewards/check_gptzero_func/mean": 0.4922366142272949, "rewards/check_gptzero_func/std": 0.264097660779953, "sampling/importance_sampling_ratio/max": 1.9086421728134155, "sampling/importance_sampling_ratio/mean": 1.0002707242965698, "sampling/importance_sampling_ratio/min": 0.6140350103378296, "sampling/sampling_logp_difference/max": 0.6463921070098877, "sampling/sampling_logp_difference/mean": 0.020477555692195892, "step": 259 }, { "clip_ratio/high_max": 0.011936339549720287, "clip_ratio/high_mean": 0.007593695539981127, "clip_ratio/low_mean": 0.005099813919514418, "clip_ratio/low_min": 0.0035891772713512182, "clip_ratio/region_mean": 0.012693509459495544, "entropy": 0.5097781419754028, "epoch": 1.7808219178082192, "grad_norm": 3.2857618703445244, "kl": 0.29209181666374207, "learning_rate": 4.5565068493150683e-07, "loss": -0.0049, "step": 260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1577.0, "completions/max_terminated_length": 1577.0, "completions/mean_length": 538.1964721679688, "completions/mean_terminated_length": 538.1964721679688, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.5293021202087402, "epoch": 1.7876712328767124, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.8338962423806062, "kl": 0.208663672208786, "learning_rate": 4.554794520547945e-07, "loss": 0.0141, "num_tokens": 4744514.0, "reward": 0.5354284644126892, "reward_std": 0.09552036225795746, "rewards/check_gptzero_func/mean": 0.5354284048080444, "rewards/check_gptzero_func/std": 0.2904328405857086, "sampling/importance_sampling_ratio/max": 1.4877688884735107, "sampling/importance_sampling_ratio/mean": 1.0000369548797607, "sampling/importance_sampling_ratio/min": 0.5729489326477051, "sampling/sampling_logp_difference/max": 0.5569586753845215, "sampling/sampling_logp_difference/mean": 0.019817668944597244, "step": 261 }, { "clip_ratio/high_max": 0.009453349746763706, "clip_ratio/high_mean": 0.006643810775130987, "clip_ratio/low_mean": 0.0036454126238822937, "clip_ratio/low_min": 0.0024449878837913275, "clip_ratio/region_mean": 0.010289222933351994, "entropy": 0.5322325825691223, "epoch": 1.7945205479452055, "grad_norm": 1.2474918921836768, "kl": 0.1436353623867035, "learning_rate": 4.5530821917808213e-07, "loss": 0.0097, "step": 262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1843.0, "completions/max_terminated_length": 1843.0, "completions/mean_length": 547.482177734375, "completions/mean_terminated_length": 547.482177734375, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "entropy": 0.5437929034233093, "epoch": 1.8013698630136985, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.553834993493882, "kl": 0.3718782067298889, "learning_rate": 4.5513698630136983e-07, "loss": 0.0038, "num_tokens": 4780039.0, "reward": 0.41317614912986755, "reward_std": 0.10180643945932388, "rewards/check_gptzero_func/mean": 0.41317611932754517, "rewards/check_gptzero_func/std": 0.2863292396068573, "sampling/importance_sampling_ratio/max": 1.6488419771194458, "sampling/importance_sampling_ratio/mean": 1.0000118017196655, "sampling/importance_sampling_ratio/min": 0.509735107421875, "sampling/sampling_logp_difference/max": 0.6738641262054443, "sampling/sampling_logp_difference/mean": 0.020194271579384804, "step": 263 }, { "clip_ratio/high_max": 0.0055716512724757195, "clip_ratio/high_mean": 0.0046202209778130054, "clip_ratio/low_mean": 0.002926160814240575, "clip_ratio/low_min": 0.0016402405453845859, "clip_ratio/region_mean": 0.007546381093561649, "entropy": 0.5463412404060364, "epoch": 1.808219178082192, "grad_norm": 1.182215559215266, "kl": 0.17617174983024597, "learning_rate": 4.5496575342465754e-07, "loss": -0.0007, "step": 264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 525.2678833007812, "completions/mean_terminated_length": 525.2678833007812, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.5676150918006897, "epoch": 1.8150684931506849, "frac_reward_zero_std": 0.0, "grad_norm": 1.1905523650463767, "kl": 0.16940809786319733, "learning_rate": 4.547945205479452e-07, "loss": -0.0256, "num_tokens": 4814466.0, "reward": 0.47984617948532104, "reward_std": 0.0727236196398735, "rewards/check_gptzero_func/mean": 0.47984617948532104, "rewards/check_gptzero_func/std": 0.2827204167842865, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0001434087753296, "sampling/importance_sampling_ratio/min": 0.6059483289718628, "sampling/sampling_logp_difference/max": 0.7487163543701172, "sampling/sampling_logp_difference/mean": 0.021022392436861992, "step": 265 }, { "clip_ratio/high_max": 0.010428736917674541, "clip_ratio/high_mean": 0.007184578571468592, "clip_ratio/low_mean": 0.004042389802634716, "clip_ratio/low_min": 0.0026978417299687862, "clip_ratio/region_mean": 0.011226966977119446, "entropy": 0.5671510100364685, "epoch": 1.821917808219178, "grad_norm": 1.0776598063236245, "kl": 0.16973233222961426, "learning_rate": 4.546232876712329e-07, "loss": -0.0305, "step": 266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1152.0, "completions/max_terminated_length": 1152.0, "completions/mean_length": 474.6964416503906, "completions/mean_terminated_length": 474.6964416503906, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.5352806448936462, "epoch": 1.8287671232876712, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.2758953914465883, "kl": 0.16730961203575134, "learning_rate": 4.5445205479452054e-07, "loss": 0.0148, "num_tokens": 4845949.0, "reward": 0.5153707265853882, "reward_std": 0.09347327798604965, "rewards/check_gptzero_func/mean": 0.5153707265853882, "rewards/check_gptzero_func/std": 0.3430137038230896, "sampling/importance_sampling_ratio/max": 1.7013431787490845, "sampling/importance_sampling_ratio/mean": 1.0003290176391602, "sampling/importance_sampling_ratio/min": 0.29503026604652405, "sampling/sampling_logp_difference/max": 1.2206772565841675, "sampling/sampling_logp_difference/mean": 0.020429430529475212, "step": 267 }, { "clip_ratio/high_max": 0.009719840250909328, "clip_ratio/high_mean": 0.007400344591587782, "clip_ratio/low_mean": 0.005905883852392435, "clip_ratio/low_min": 0.003514644457027316, "clip_ratio/region_mean": 0.013306227512657642, "entropy": 0.5332766771316528, "epoch": 1.8356164383561644, "grad_norm": 1.190231671501567, "kl": 0.17249591648578644, "learning_rate": 4.542808219178082e-07, "loss": 0.009, "step": 268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1178.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 392.9464416503906, "completions/mean_terminated_length": 392.9464416503906, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.5165501832962036, "epoch": 1.8424657534246576, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.4876952028252508, "kl": 0.21769985556602478, "learning_rate": 4.5410958904109584e-07, "loss": -0.0111, "num_tokens": 4872754.0, "reward": 0.4698053002357483, "reward_std": 0.1128959208726883, "rewards/check_gptzero_func/mean": 0.4698052704334259, "rewards/check_gptzero_func/std": 0.344531387090683, "sampling/importance_sampling_ratio/max": 1.9693636894226074, "sampling/importance_sampling_ratio/mean": 1.0000394582748413, "sampling/importance_sampling_ratio/min": 0.5228946208953857, "sampling/sampling_logp_difference/max": 0.6777105331420898, "sampling/sampling_logp_difference/mean": 0.020351167768239975, "step": 269 }, { "clip_ratio/high_max": 0.011848341673612595, "clip_ratio/high_mean": 0.009339476004242897, "clip_ratio/low_mean": 0.005352491978555918, "clip_ratio/low_min": 0.0037735849618911743, "clip_ratio/region_mean": 0.014691968448460102, "entropy": 0.5175048112869263, "epoch": 1.8493150684931505, "grad_norm": 1.2836358101103043, "kl": 0.21209128201007843, "learning_rate": 4.5393835616438354e-07, "loss": -0.0171, "step": 270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1138.0, "completions/max_terminated_length": 1138.0, "completions/mean_length": 422.3571472167969, "completions/mean_terminated_length": 422.3571472167969, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.5166867971420288, "epoch": 1.856164383561644, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3734784271637364, "kl": 0.1810331642627716, "learning_rate": 4.537671232876712e-07, "loss": 0.0048, "num_tokens": 4901216.0, "reward": 0.4876410961151123, "reward_std": 0.04964882880449295, "rewards/check_gptzero_func/mean": 0.4876410663127899, "rewards/check_gptzero_func/std": 0.3094322681427002, "sampling/importance_sampling_ratio/max": 1.6488171815872192, "sampling/importance_sampling_ratio/mean": 1.0001248121261597, "sampling/importance_sampling_ratio/min": 0.282277911901474, "sampling/sampling_logp_difference/max": 1.264863133430481, "sampling/sampling_logp_difference/mean": 0.02102573961019516, "step": 271 }, { "clip_ratio/high_max": 0.012783265672624111, "clip_ratio/high_mean": 0.008751669898629189, "clip_ratio/low_mean": 0.005196419078856707, "clip_ratio/low_min": 0.003312355140224099, "clip_ratio/region_mean": 0.013948089443147182, "entropy": 0.5183298587799072, "epoch": 1.8630136986301369, "grad_norm": 1.230014464197315, "kl": 0.18157216906547546, "learning_rate": 4.5359589041095884e-07, "loss": -0.0011, "step": 272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1260.0, "completions/max_terminated_length": 1260.0, "completions/mean_length": 361.89288330078125, "completions/mean_terminated_length": 361.89288330078125, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "entropy": 0.43912723660469055, "epoch": 1.8698630136986303, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.4803076644858433, "kl": 0.24585311114788055, "learning_rate": 4.534246575342466e-07, "loss": 0.0109, "num_tokens": 4926388.0, "reward": 0.5666877031326294, "reward_std": 0.10071952641010284, "rewards/check_gptzero_func/mean": 0.5666877031326294, "rewards/check_gptzero_func/std": 0.2772006392478943, "sampling/importance_sampling_ratio/max": 1.7144050598144531, "sampling/importance_sampling_ratio/mean": 1.0002951622009277, "sampling/importance_sampling_ratio/min": 0.5537850856781006, "sampling/sampling_logp_difference/max": 0.5909786224365234, "sampling/sampling_logp_difference/mean": 0.01819380186498165, "step": 273 }, { "clip_ratio/high_max": 0.014370244927704334, "clip_ratio/high_mean": 0.009365306235849857, "clip_ratio/low_mean": 0.006751033943146467, "clip_ratio/low_min": 0.004454343114048243, "clip_ratio/region_mean": 0.016116339713335037, "entropy": 0.4386621415615082, "epoch": 1.8767123287671232, "grad_norm": 1.3927456781704974, "kl": 0.2626037895679474, "learning_rate": 4.5325342465753425e-07, "loss": 0.0046, "step": 274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1487.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 452.2857360839844, "completions/mean_terminated_length": 452.2857360839844, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.5233879089355469, "epoch": 1.8835616438356164, "frac_reward_zero_std": 0.0, "grad_norm": 1.3210278214131486, "kl": 0.20293307304382324, "learning_rate": 4.530821917808219e-07, "loss": -0.024, "num_tokens": 4956654.0, "reward": 0.5127921104431152, "reward_std": 0.12284902483224869, "rewards/check_gptzero_func/mean": 0.5127921104431152, "rewards/check_gptzero_func/std": 0.3003963232040405, "sampling/importance_sampling_ratio/max": 1.5519204139709473, "sampling/importance_sampling_ratio/mean": 0.9998078346252441, "sampling/importance_sampling_ratio/min": 0.5254126191139221, "sampling/sampling_logp_difference/max": 0.6435713768005371, "sampling/sampling_logp_difference/mean": 0.020178068429231644, "step": 275 }, { "clip_ratio/high_max": 0.00888373889029026, "clip_ratio/high_mean": 0.006909410934895277, "clip_ratio/low_mean": 0.005390775389969349, "clip_ratio/low_min": 0.00440960330888629, "clip_ratio/region_mean": 0.012300186790525913, "entropy": 0.5224551558494568, "epoch": 1.8904109589041096, "grad_norm": 1.1605821666407616, "kl": 0.2022383064031601, "learning_rate": 4.5291095890410955e-07, "loss": -0.03, "step": 276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 489.8214416503906, "completions/mean_terminated_length": 489.8214416503906, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.5719665884971619, "epoch": 1.8972602739726028, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3872257152047092, "kl": 0.1637299358844757, "learning_rate": 4.5273972602739726e-07, "loss": 0.0191, "num_tokens": 4988790.0, "reward": 0.46473661065101624, "reward_std": 0.0836201012134552, "rewards/check_gptzero_func/mean": 0.46473661065101624, "rewards/check_gptzero_func/std": 0.3209838569164276, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9997662901878357, "sampling/importance_sampling_ratio/min": 0.4852724075317383, "sampling/sampling_logp_difference/max": 0.7230448722839355, "sampling/sampling_logp_difference/mean": 0.02172783575952053, "step": 277 }, { "clip_ratio/high_max": 0.009334889240562916, "clip_ratio/high_mean": 0.006211345549672842, "clip_ratio/low_mean": 0.0054992507211863995, "clip_ratio/low_min": 0.0017566974274814129, "clip_ratio/region_mean": 0.011710597202181816, "entropy": 0.569563090801239, "epoch": 1.904109589041096, "grad_norm": 1.538777037917703, "kl": 0.16554544866085052, "learning_rate": 4.525684931506849e-07, "loss": 0.0131, "step": 278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1362.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 422.5714416503906, "completions/mean_terminated_length": 422.5714416503906, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.548733651638031, "epoch": 1.910958904109589, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.4056289683684908, "kl": 0.17054036259651184, "learning_rate": 4.5239726027397256e-07, "loss": -0.0009, "num_tokens": 5017118.0, "reward": 0.5001675486564636, "reward_std": 0.11519249528646469, "rewards/check_gptzero_func/mean": 0.5001674890518188, "rewards/check_gptzero_func/std": 0.3205961287021637, "sampling/importance_sampling_ratio/max": 1.523703694343567, "sampling/importance_sampling_ratio/mean": 0.9999436140060425, "sampling/importance_sampling_ratio/min": 0.5668460726737976, "sampling/sampling_logp_difference/max": 0.5676674842834473, "sampling/sampling_logp_difference/mean": 0.020307078957557678, "step": 279 }, { "clip_ratio/high_max": 0.010907003656029701, "clip_ratio/high_mean": 0.007651847787201405, "clip_ratio/low_mean": 0.004846721887588501, "clip_ratio/low_min": 0.002385211642831564, "clip_ratio/region_mean": 0.01249857060611248, "entropy": 0.550762951374054, "epoch": 1.9178082191780823, "grad_norm": 2.9239563455486337, "kl": 0.19452139735221863, "learning_rate": 4.5222602739726026e-07, "loss": -0.0068, "step": 280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2151.0, "completions/max_terminated_length": 2151.0, "completions/mean_length": 584.5535888671875, "completions/mean_terminated_length": 584.5535888671875, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.6184817552566528, "epoch": 1.9246575342465753, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.1801570559517738, "kl": 0.15195490419864655, "learning_rate": 4.520547945205479e-07, "loss": -0.0072, "num_tokens": 5054503.0, "reward": 0.5221143960952759, "reward_std": 0.0781073346734047, "rewards/check_gptzero_func/mean": 0.5221143960952759, "rewards/check_gptzero_func/std": 0.2897453308105469, "sampling/importance_sampling_ratio/max": 1.9378459453582764, "sampling/importance_sampling_ratio/mean": 1.0000813007354736, "sampling/importance_sampling_ratio/min": 0.6136391162872314, "sampling/sampling_logp_difference/max": 0.661577045917511, "sampling/sampling_logp_difference/mean": 0.021727705374360085, "step": 281 }, { "clip_ratio/high_max": 0.006753106601536274, "clip_ratio/high_mean": 0.005459864158183336, "clip_ratio/low_mean": 0.0038054361939430237, "clip_ratio/low_min": 0.0004618937673512846, "clip_ratio/region_mean": 0.009265298955142498, "entropy": 0.6180806159973145, "epoch": 1.9315068493150684, "grad_norm": 1.0658425696241047, "kl": 0.15125629305839539, "learning_rate": 4.518835616438356e-07, "loss": -0.0127, "step": 282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1506.0, "completions/max_terminated_length": 1506.0, "completions/mean_length": 482.5357360839844, "completions/mean_terminated_length": 482.5357360839844, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.5691046118736267, "epoch": 1.9383561643835616, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3476704968046347, "kl": 0.16383104026317596, "learning_rate": 4.5171232876712326e-07, "loss": -0.0208, "num_tokens": 5086713.0, "reward": 0.4358144998550415, "reward_std": 0.09828458726406097, "rewards/check_gptzero_func/mean": 0.4358144700527191, "rewards/check_gptzero_func/std": 0.3208392560482025, "sampling/importance_sampling_ratio/max": 1.811137318611145, "sampling/importance_sampling_ratio/mean": 1.000250220298767, "sampling/importance_sampling_ratio/min": 0.45822417736053467, "sampling/sampling_logp_difference/max": 0.7803966999053955, "sampling/sampling_logp_difference/mean": 0.02110559679567814, "step": 283 }, { "clip_ratio/high_max": 0.009233610704541206, "clip_ratio/high_mean": 0.007337605115026236, "clip_ratio/low_mean": 0.004256221000105143, "clip_ratio/low_min": 0.0028436018619686365, "clip_ratio/region_mean": 0.011593826115131378, "entropy": 0.5719698667526245, "epoch": 1.9452054794520548, "grad_norm": 1.1785010704930199, "kl": 0.15816164016723633, "learning_rate": 4.5154109589041097e-07, "loss": -0.0264, "step": 284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1030.0, "completions/max_terminated_length": 1030.0, "completions/mean_length": 383.01788330078125, "completions/mean_terminated_length": 383.01788330078125, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.45141586661338806, "epoch": 1.952054794520548, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.4727262593121035, "kl": 0.2536100447177887, "learning_rate": 4.513698630136986e-07, "loss": 0.0021, "num_tokens": 5113228.0, "reward": 0.5931656956672668, "reward_std": 0.09964369982481003, "rewards/check_gptzero_func/mean": 0.5931656956672668, "rewards/check_gptzero_func/std": 0.2848498523235321, "sampling/importance_sampling_ratio/max": 1.8181557655334473, "sampling/importance_sampling_ratio/mean": 1.000103235244751, "sampling/importance_sampling_ratio/min": 0.6085143685340881, "sampling/sampling_logp_difference/max": 0.5978226661682129, "sampling/sampling_logp_difference/mean": 0.019446713849902153, "step": 285 }, { "clip_ratio/high_max": 0.013003901578485966, "clip_ratio/high_mean": 0.008975149132311344, "clip_ratio/low_mean": 0.006079849787056446, "clip_ratio/low_min": 0.0022081148345023394, "clip_ratio/region_mean": 0.01505499891936779, "entropy": 0.45294904708862305, "epoch": 1.958904109589041, "grad_norm": 1.310017825470282, "kl": 0.24961228668689728, "learning_rate": 4.5119863013698627e-07, "loss": -0.0035, "step": 286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1010.0, "completions/max_terminated_length": 1010.0, "completions/mean_length": 463.232177734375, "completions/mean_terminated_length": 463.232177734375, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.5875517129898071, "epoch": 1.9657534246575343, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.368115057594928, "kl": 0.23248417675495148, "learning_rate": 4.5102739726027397e-07, "loss": 0.0076, "num_tokens": 5143737.0, "reward": 0.537977397441864, "reward_std": 0.13914541900157928, "rewards/check_gptzero_func/mean": 0.5379773378372192, "rewards/check_gptzero_func/std": 0.27932825684547424, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0000163316726685, "sampling/importance_sampling_ratio/min": 0.470598042011261, "sampling/sampling_logp_difference/max": 0.9246463775634766, "sampling/sampling_logp_difference/mean": 0.022372247651219368, "step": 287 }, { "clip_ratio/high_max": 0.01075697224587202, "clip_ratio/high_mean": 0.007834697142243385, "clip_ratio/low_mean": 0.005082257091999054, "clip_ratio/low_min": 0.0029325513169169426, "clip_ratio/region_mean": 0.01291695423424244, "entropy": 0.5867612957954407, "epoch": 1.9726027397260273, "grad_norm": 1.3553040987282579, "kl": 0.23995132744312286, "learning_rate": 4.508561643835616e-07, "loss": 0.0018, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1567.0, "completions/max_terminated_length": 1567.0, "completions/mean_length": 425.76788330078125, "completions/mean_terminated_length": 425.76788330078125, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.48439592123031616, "epoch": 1.9794520547945207, "frac_reward_zero_std": 0.0, "grad_norm": 1.382886393761431, "kl": 0.23494450747966766, "learning_rate": 4.5068493150684927e-07, "loss": -0.0153, "num_tokens": 5172686.0, "reward": 0.5658457279205322, "reward_std": 0.09007210284471512, "rewards/check_gptzero_func/mean": 0.5658456683158875, "rewards/check_gptzero_func/std": 0.2941392660140991, "sampling/importance_sampling_ratio/max": 1.8139524459838867, "sampling/importance_sampling_ratio/mean": 1.0000836849212646, "sampling/importance_sampling_ratio/min": 0.3782794177532196, "sampling/sampling_logp_difference/max": 0.9721221923828125, "sampling/sampling_logp_difference/mean": 0.019122226163744926, "step": 289 }, { "clip_ratio/high_max": 0.01378192100673914, "clip_ratio/high_mean": 0.009554324671626091, "clip_ratio/low_mean": 0.006035731174051762, "clip_ratio/low_min": 0.00279999990016222, "clip_ratio/region_mean": 0.015590055845677853, "entropy": 0.48506519198417664, "epoch": 1.9863013698630136, "grad_norm": 1.1793814770105315, "kl": 0.23484954237937927, "learning_rate": 4.505136986301369e-07, "loss": -0.0212, "step": 290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1837.0, "completions/max_terminated_length": 1837.0, "completions/mean_length": 469.2500305175781, "completions/mean_terminated_length": 469.2500305175781, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.5099441409111023, "epoch": 1.9931506849315068, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.2734497821941686, "kl": 0.20655357837677002, "learning_rate": 4.503424657534247e-07, "loss": -0.0043, "num_tokens": 5204060.0, "reward": 0.4763372242450714, "reward_std": 0.08321836590766907, "rewards/check_gptzero_func/mean": 0.476337194442749, "rewards/check_gptzero_func/std": 0.27227115631103516, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0004249811172485, "sampling/importance_sampling_ratio/min": 0.3020566403865814, "sampling/sampling_logp_difference/max": 1.1971406936645508, "sampling/sampling_logp_difference/mean": 0.020308099687099457, "step": 291 }, { "clip_ratio/high_max": 0.010702340863645077, "clip_ratio/high_mean": 0.007274560630321503, "clip_ratio/low_mean": 0.005275948904454708, "clip_ratio/low_min": 0.0016736402176320553, "clip_ratio/region_mean": 0.01255050953477621, "entropy": 0.5090036988258362, "epoch": 2.0, "grad_norm": 1.177605960016176, "kl": 0.20845063030719757, "learning_rate": 4.5017123287671233e-07, "loss": -0.01, "step": 292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1666.0, "completions/max_terminated_length": 1666.0, "completions/mean_length": 516.2678833007812, "completions/mean_terminated_length": 516.2678833007812, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.5691830515861511, "epoch": 2.006849315068493, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.262707935466531, "kl": 0.1664128601551056, "learning_rate": 4.5e-07, "loss": -0.0059, "num_tokens": 5237761.0, "reward": 0.5623289942741394, "reward_std": 0.09568648785352707, "rewards/check_gptzero_func/mean": 0.5623289942741394, "rewards/check_gptzero_func/std": 0.2743038237094879, "sampling/importance_sampling_ratio/max": 1.5920027494430542, "sampling/importance_sampling_ratio/mean": 1.0002144575119019, "sampling/importance_sampling_ratio/min": 0.2553112804889679, "sampling/sampling_logp_difference/max": 1.365271806716919, "sampling/sampling_logp_difference/mean": 0.0209431704133749, "step": 293 }, { "clip_ratio/high_max": 0.008652246557176113, "clip_ratio/high_mean": 0.007148959673941135, "clip_ratio/low_mean": 0.0051490068435668945, "clip_ratio/low_min": 0.003751931246370077, "clip_ratio/region_mean": 0.01229796651750803, "entropy": 0.5687645077705383, "epoch": 2.0136986301369864, "grad_norm": 1.1117883181556136, "kl": 0.1679745465517044, "learning_rate": 4.4982876712328763e-07, "loss": -0.0118, "step": 294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 440.5000305175781, "completions/mean_terminated_length": 440.5000305175781, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.5400620102882385, "epoch": 2.0205479452054793, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.2970875894881297, "kl": 0.1951117068529129, "learning_rate": 4.4965753424657533e-07, "loss": -0.014, "num_tokens": 5267593.0, "reward": 0.5096891522407532, "reward_std": 0.05138532817363739, "rewards/check_gptzero_func/mean": 0.5096891522407532, "rewards/check_gptzero_func/std": 0.2989237904548645, "sampling/importance_sampling_ratio/max": 1.6275849342346191, "sampling/importance_sampling_ratio/mean": 0.9998806715011597, "sampling/importance_sampling_ratio/min": 0.48965927958488464, "sampling/sampling_logp_difference/max": 0.714045524597168, "sampling/sampling_logp_difference/mean": 0.020893005654215813, "step": 295 }, { "clip_ratio/high_max": 0.010105093009769917, "clip_ratio/high_mean": 0.007819474674761295, "clip_ratio/low_mean": 0.004637745674699545, "clip_ratio/low_min": 0.0017674090340733528, "clip_ratio/region_mean": 0.012457220815122128, "entropy": 0.5400992631912231, "epoch": 2.0273972602739727, "grad_norm": 1.4737191924655055, "kl": 0.20026670396327972, "learning_rate": 4.49486301369863e-07, "loss": -0.0202, "step": 296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1770.0, "completions/max_terminated_length": 1770.0, "completions/mean_length": 544.4642944335938, "completions/mean_terminated_length": 544.4642944335938, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.5712693929672241, "epoch": 2.0342465753424657, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.2013515263671872, "kl": 0.14922939240932465, "learning_rate": 4.4931506849315063e-07, "loss": -0.0071, "num_tokens": 5302799.0, "reward": 0.4049447476863861, "reward_std": 0.09331052750349045, "rewards/check_gptzero_func/mean": 0.4049447178840637, "rewards/check_gptzero_func/std": 0.31214722990989685, "sampling/importance_sampling_ratio/max": 1.5869836807250977, "sampling/importance_sampling_ratio/mean": 1.000033974647522, "sampling/importance_sampling_ratio/min": 0.627055287361145, "sampling/sampling_logp_difference/max": 0.4667205810546875, "sampling/sampling_logp_difference/mean": 0.021255651488900185, "step": 297 }, { "clip_ratio/high_max": 0.009644529782235622, "clip_ratio/high_mean": 0.007171308156102896, "clip_ratio/low_mean": 0.004662294406443834, "clip_ratio/low_min": 0.0027555800043046474, "clip_ratio/region_mean": 0.01183360256254673, "entropy": 0.5722337365150452, "epoch": 2.041095890410959, "grad_norm": 1.0936486832167578, "kl": 0.15139320492744446, "learning_rate": 4.4914383561643834e-07, "loss": -0.0127, "step": 298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1951.0, "completions/max_terminated_length": 1951.0, "completions/mean_length": 613.4107666015625, "completions/mean_terminated_length": 613.4107666015625, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "entropy": 0.5495310425758362, "epoch": 2.047945205479452, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.1659037632794753, "kl": 0.17676877975463867, "learning_rate": 4.48972602739726e-07, "loss": 0.0278, "num_tokens": 5342020.0, "reward": 0.48609521985054016, "reward_std": 0.09159456193447113, "rewards/check_gptzero_func/mean": 0.4860951900482178, "rewards/check_gptzero_func/std": 0.21470338106155396, "sampling/importance_sampling_ratio/max": 1.7365124225616455, "sampling/importance_sampling_ratio/mean": 1.0001083612442017, "sampling/importance_sampling_ratio/min": 0.45248302817344666, "sampling/sampling_logp_difference/max": 0.7930049896240234, "sampling/sampling_logp_difference/mean": 0.02168208733201027, "step": 299 }, { "clip_ratio/high_max": 0.010641200467944145, "clip_ratio/high_mean": 0.007433556020259857, "clip_ratio/low_mean": 0.004597808700054884, "clip_ratio/low_min": 0.0038910505827516317, "clip_ratio/region_mean": 0.012031364254653454, "entropy": 0.5505670309066772, "epoch": 2.0547945205479454, "grad_norm": 1.0157160159955414, "kl": 0.1727115958929062, "learning_rate": 4.488013698630137e-07, "loss": 0.0224, "step": 300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2836.0, "completions/max_terminated_length": 2836.0, "completions/mean_length": 522.982177734375, "completions/mean_terminated_length": 522.982177734375, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.4869726002216339, "epoch": 2.0616438356164384, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.1634233442774475, "kl": 0.18257594108581543, "learning_rate": 4.4863013698630134e-07, "loss": -0.0064, "num_tokens": 5375977.0, "reward": 0.4351056218147278, "reward_std": 0.10244044661521912, "rewards/check_gptzero_func/mean": 0.4351055920124054, "rewards/check_gptzero_func/std": 0.30534330010414124, "sampling/importance_sampling_ratio/max": 1.8732534646987915, "sampling/importance_sampling_ratio/mean": 1.0006484985351562, "sampling/importance_sampling_ratio/min": 0.301175594329834, "sampling/sampling_logp_difference/max": 1.2000617980957031, "sampling/sampling_logp_difference/mean": 0.019790615886449814, "step": 301 }, { "clip_ratio/high_max": 0.012481996789574623, "clip_ratio/high_mean": 0.008686253800988197, "clip_ratio/low_mean": 0.006899445317685604, "clip_ratio/low_min": 0.002538363914936781, "clip_ratio/region_mean": 0.015585699118673801, "entropy": 0.4847429692745209, "epoch": 2.0684931506849313, "grad_norm": 1.1651626489669427, "kl": 0.19437411427497864, "learning_rate": 4.4845890410958904e-07, "loss": -0.0116, "step": 302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2188.0, "completions/max_terminated_length": 2188.0, "completions/mean_length": 430.1964416503906, "completions/mean_terminated_length": 430.1964416503906, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.5035663843154907, "epoch": 2.0753424657534247, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.4380078243436762, "kl": 0.20847947895526886, "learning_rate": 4.482876712328767e-07, "loss": -0.0002, "num_tokens": 5405044.0, "reward": 0.49560922384262085, "reward_std": 0.10123346745967865, "rewards/check_gptzero_func/mean": 0.49560922384262085, "rewards/check_gptzero_func/std": 0.256854772567749, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0001980066299438, "sampling/importance_sampling_ratio/min": 0.49781563878059387, "sampling/sampling_logp_difference/max": 0.8503701686859131, "sampling/sampling_logp_difference/mean": 0.020015304908156395, "step": 303 }, { "clip_ratio/high_max": 0.010917775332927704, "clip_ratio/high_mean": 0.009024837985634804, "clip_ratio/low_mean": 0.005507683847099543, "clip_ratio/low_min": 0.0034118047915399075, "clip_ratio/region_mean": 0.014532520435750484, "entropy": 0.5052453279495239, "epoch": 2.0821917808219177, "grad_norm": 1.2243507099396764, "kl": 0.1995290070772171, "learning_rate": 4.4811643835616435e-07, "loss": -0.0064, "step": 304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 301.58929443359375, "completions/mean_terminated_length": 301.58929443359375, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.48250773549079895, "epoch": 2.089041095890411, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.6899730171861576, "kl": 0.2902650833129883, "learning_rate": 4.4794520547945205e-07, "loss": 0.0221, "num_tokens": 5426893.0, "reward": 0.6250794529914856, "reward_std": 0.09574930369853973, "rewards/check_gptzero_func/mean": 0.6250794529914856, "rewards/check_gptzero_func/std": 0.33612221479415894, "sampling/importance_sampling_ratio/max": 1.9099640846252441, "sampling/importance_sampling_ratio/mean": 1.0009260177612305, "sampling/importance_sampling_ratio/min": 0.5489336252212524, "sampling/sampling_logp_difference/max": 0.6470844745635986, "sampling/sampling_logp_difference/mean": 0.019660361111164093, "step": 305 }, { "clip_ratio/high_max": 0.0080106807872653, "clip_ratio/high_mean": 0.006182576064020395, "clip_ratio/low_mean": 0.008344483561813831, "clip_ratio/low_min": 0.005072463769465685, "clip_ratio/region_mean": 0.014527060091495514, "entropy": 0.48059701919555664, "epoch": 2.095890410958904, "grad_norm": 1.4701281165568707, "kl": 0.30283379554748535, "learning_rate": 4.477739726027397e-07, "loss": 0.0155, "step": 306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1239.0, "completions/max_terminated_length": 1239.0, "completions/mean_length": 363.5535888671875, "completions/mean_terminated_length": 363.5535888671875, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "entropy": 0.4417833685874939, "epoch": 2.1027397260273974, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.5039927517307254, "kl": 0.24800311028957367, "learning_rate": 4.4760273972602735e-07, "loss": -0.0035, "num_tokens": 5452038.0, "reward": 0.5157589316368103, "reward_std": 0.09111977368593216, "rewards/check_gptzero_func/mean": 0.5157589316368103, "rewards/check_gptzero_func/std": 0.3117639720439911, "sampling/importance_sampling_ratio/max": 1.6767667531967163, "sampling/importance_sampling_ratio/mean": 0.9993632435798645, "sampling/importance_sampling_ratio/min": 0.45974859595298767, "sampling/sampling_logp_difference/max": 0.7770754098892212, "sampling/sampling_logp_difference/mean": 0.018949832767248154, "step": 307 }, { "clip_ratio/high_max": 0.013996889814734459, "clip_ratio/high_mean": 0.011157328262925148, "clip_ratio/low_mean": 0.008818816393613815, "clip_ratio/low_min": 0.004900459200143814, "clip_ratio/region_mean": 0.019976148381829262, "entropy": 0.4443700909614563, "epoch": 2.1095890410958904, "grad_norm": 1.4711737704519592, "kl": 0.24044491350650787, "learning_rate": 4.47431506849315e-07, "loss": -0.0098, "step": 308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1988.0, "completions/max_terminated_length": 1988.0, "completions/mean_length": 496.4107360839844, "completions/mean_terminated_length": 496.4107360839844, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.481070339679718, "epoch": 2.1164383561643834, "frac_reward_zero_std": 0.0, "grad_norm": 1.2123068728484434, "kl": 0.242346853017807, "learning_rate": 4.4726027397260276e-07, "loss": -0.0012, "num_tokens": 5484395.0, "reward": 0.46414050459861755, "reward_std": 0.14316627383232117, "rewards/check_gptzero_func/mean": 0.46414047479629517, "rewards/check_gptzero_func/std": 0.31964606046676636, "sampling/importance_sampling_ratio/max": 1.6143134832382202, "sampling/importance_sampling_ratio/mean": 1.000117301940918, "sampling/importance_sampling_ratio/min": 0.5676719546318054, "sampling/sampling_logp_difference/max": 0.5662115812301636, "sampling/sampling_logp_difference/mean": 0.01939268782734871, "step": 309 }, { "clip_ratio/high_max": 0.015593220479786396, "clip_ratio/high_mean": 0.008582805283367634, "clip_ratio/low_mean": 0.004942356608808041, "clip_ratio/low_min": 0.003881610929965973, "clip_ratio/region_mean": 0.013525160029530525, "entropy": 0.4822831153869629, "epoch": 2.1232876712328768, "grad_norm": 1.1132596646729607, "kl": 0.2369152158498764, "learning_rate": 4.470890410958904e-07, "loss": -0.0072, "step": 310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2019.0, "completions/max_terminated_length": 2019.0, "completions/mean_length": 484.4464416503906, "completions/mean_terminated_length": 484.4464416503906, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.5473940372467041, "epoch": 2.1301369863013697, "frac_reward_zero_std": 0.0, "grad_norm": 1.2963797366759875, "kl": 0.16996677219867706, "learning_rate": 4.4691780821917806e-07, "loss": -0.0033, "num_tokens": 5516840.0, "reward": 0.5765905380249023, "reward_std": 0.08644893020391464, "rewards/check_gptzero_func/mean": 0.5765905380249023, "rewards/check_gptzero_func/std": 0.2729920446872711, "sampling/importance_sampling_ratio/max": 1.597672462463379, "sampling/importance_sampling_ratio/mean": 0.9996706247329712, "sampling/importance_sampling_ratio/min": 0.446305513381958, "sampling/sampling_logp_difference/max": 0.8067516088485718, "sampling/sampling_logp_difference/mean": 0.02033572643995285, "step": 311 }, { "clip_ratio/high_max": 0.0120060034096241, "clip_ratio/high_mean": 0.008446287363767624, "clip_ratio/low_mean": 0.005645204335451126, "clip_ratio/low_min": 0.002945508109405637, "clip_ratio/region_mean": 0.01409149169921875, "entropy": 0.5488283038139343, "epoch": 2.136986301369863, "grad_norm": 1.2003799951965057, "kl": 0.17118623852729797, "learning_rate": 4.4674657534246576e-07, "loss": -0.0091, "step": 312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1509.0, "completions/max_terminated_length": 1509.0, "completions/mean_length": 467.5714416503906, "completions/mean_terminated_length": 467.5714416503906, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.5533055067062378, "epoch": 2.143835616438356, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.3552550946448236, "kl": 0.19718776643276215, "learning_rate": 4.465753424657534e-07, "loss": -0.0111, "num_tokens": 5548136.0, "reward": 0.5722416639328003, "reward_std": 0.06716534495353699, "rewards/check_gptzero_func/mean": 0.5722416639328003, "rewards/check_gptzero_func/std": 0.2759210765361786, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0002747774124146, "sampling/importance_sampling_ratio/min": 0.4741215109825134, "sampling/sampling_logp_difference/max": 0.9351195096969604, "sampling/sampling_logp_difference/mean": 0.02099498175084591, "step": 313 }, { "clip_ratio/high_max": 0.010422698222100735, "clip_ratio/high_mean": 0.007626709062606096, "clip_ratio/low_mean": 0.006734912283718586, "clip_ratio/low_min": 0.0036109229549765587, "clip_ratio/region_mean": 0.01436162181198597, "entropy": 0.5527940392494202, "epoch": 2.1506849315068495, "grad_norm": 1.282432608320012, "kl": 0.20849230885505676, "learning_rate": 4.4640410958904106e-07, "loss": -0.0173, "step": 314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1648.0, "completions/max_terminated_length": 1648.0, "completions/mean_length": 484.5357360839844, "completions/mean_terminated_length": 484.5357360839844, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.5199235081672668, "epoch": 2.1575342465753424, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.2929562581774894, "kl": 0.18263091146945953, "learning_rate": 4.462328767123287e-07, "loss": -0.0125, "num_tokens": 5580036.0, "reward": 0.5088039636611938, "reward_std": 0.0833464190363884, "rewards/check_gptzero_func/mean": 0.5088039040565491, "rewards/check_gptzero_func/std": 0.2931346297264099, "sampling/importance_sampling_ratio/max": 1.6436339616775513, "sampling/importance_sampling_ratio/mean": 1.0002484321594238, "sampling/importance_sampling_ratio/min": 0.31381329894065857, "sampling/sampling_logp_difference/max": 1.1589571237564087, "sampling/sampling_logp_difference/mean": 0.019967317581176758, "step": 315 }, { "clip_ratio/high_max": 0.012979988940060139, "clip_ratio/high_mean": 0.008771310560405254, "clip_ratio/low_mean": 0.005298694130033255, "clip_ratio/low_min": 0.004326662980020046, "clip_ratio/region_mean": 0.014070005156099796, "entropy": 0.5199235081672668, "epoch": 2.1643835616438354, "grad_norm": 201.14772103444125, "kl": 4.112004280090332, "learning_rate": 4.460616438356164e-07, "loss": 0.0023, "step": 316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1599.0, "completions/max_terminated_length": 1599.0, "completions/mean_length": 528.2678833007812, "completions/mean_terminated_length": 528.2678833007812, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "entropy": 0.5799452662467957, "epoch": 2.171232876712329, "frac_reward_zero_std": 0.0, "grad_norm": 1.2503886912566706, "kl": 0.1943761557340622, "learning_rate": 4.4589041095890407e-07, "loss": -0.0204, "num_tokens": 5614635.0, "reward": 0.5073720216751099, "reward_std": 0.07880806922912598, "rewards/check_gptzero_func/mean": 0.5073719620704651, "rewards/check_gptzero_func/std": 0.2797766327857971, "sampling/importance_sampling_ratio/max": 1.8336056470870972, "sampling/importance_sampling_ratio/mean": 0.9998432993888855, "sampling/importance_sampling_ratio/min": 0.0008615035330876708, "sampling/sampling_logp_difference/max": 7.056831359863281, "sampling/sampling_logp_difference/mean": 0.02174035646021366, "step": 317 }, { "clip_ratio/high_max": 0.0101551478728652, "clip_ratio/high_mean": 0.007659185212105513, "clip_ratio/low_mean": 0.004478625487536192, "clip_ratio/low_min": 0.0029797377064824104, "clip_ratio/region_mean": 0.012137810699641705, "entropy": 0.5799026489257812, "epoch": 2.1780821917808217, "grad_norm": 1.438773606737178, "kl": 0.19168250262737274, "learning_rate": 4.4571917808219177e-07, "loss": -0.0263, "step": 318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 460.8214416503906, "completions/mean_terminated_length": 460.8214416503906, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "entropy": 0.5221045017242432, "epoch": 2.184931506849315, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.3750765435255947, "kl": 0.2366912066936493, "learning_rate": 4.4554794520547947e-07, "loss": 0.0126, "num_tokens": 5645603.0, "reward": 0.4981337785720825, "reward_std": 0.10319367051124573, "rewards/check_gptzero_func/mean": 0.49813371896743774, "rewards/check_gptzero_func/std": 0.24339556694030762, "sampling/importance_sampling_ratio/max": 1.620772361755371, "sampling/importance_sampling_ratio/mean": 0.9999723434448242, "sampling/importance_sampling_ratio/min": 0.2696665823459625, "sampling/sampling_logp_difference/max": 1.310568928718567, "sampling/sampling_logp_difference/mean": 0.020263923332095146, "step": 319 }, { "clip_ratio/high_max": 0.012907852418720722, "clip_ratio/high_mean": 0.009253566153347492, "clip_ratio/low_mean": 0.006024139933288097, "clip_ratio/low_min": 0.0038284838665276766, "clip_ratio/region_mean": 0.01527770608663559, "entropy": 0.525387704372406, "epoch": 2.191780821917808, "grad_norm": 1.2775684302887682, "kl": 0.222274512052536, "learning_rate": 4.453767123287671e-07, "loss": 0.0066, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1697.0, "completions/max_terminated_length": 1697.0, "completions/mean_length": 476.89288330078125, "completions/mean_terminated_length": 476.89288330078125, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.5432084798812866, "epoch": 2.1986301369863015, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.3001335705162125, "kl": 0.1978531777858734, "learning_rate": 4.4520547945205477e-07, "loss": 0.0105, "num_tokens": 5677103.0, "reward": 0.5298625826835632, "reward_std": 0.09097912907600403, "rewards/check_gptzero_func/mean": 0.5298625826835632, "rewards/check_gptzero_func/std": 0.2747863233089447, "sampling/importance_sampling_ratio/max": 1.6429671049118042, "sampling/importance_sampling_ratio/mean": 0.9993921518325806, "sampling/importance_sampling_ratio/min": 0.42507243156433105, "sampling/sampling_logp_difference/max": 0.8554956912994385, "sampling/sampling_logp_difference/mean": 0.020479729399085045, "step": 321 }, { "clip_ratio/high_max": 0.009937888011336327, "clip_ratio/high_mean": 0.00875752605497837, "clip_ratio/low_mean": 0.005161639302968979, "clip_ratio/low_min": 0.00309803057461977, "clip_ratio/region_mean": 0.01391916535794735, "entropy": 0.5436651110649109, "epoch": 2.2054794520547945, "grad_norm": 1.2485927321002492, "kl": 0.19347521662712097, "learning_rate": 4.450342465753424e-07, "loss": 0.0045, "step": 322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1975.0, "completions/max_terminated_length": 1975.0, "completions/mean_length": 471.5357360839844, "completions/mean_terminated_length": 471.5357360839844, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.5325497388839722, "epoch": 2.212328767123288, "frac_reward_zero_std": 0.0, "grad_norm": 1.2692021744970061, "kl": 0.1778530776500702, "learning_rate": 4.4486301369863013e-07, "loss": -0.0133, "num_tokens": 5707757.0, "reward": 0.5381926894187927, "reward_std": 0.05944599583745003, "rewards/check_gptzero_func/mean": 0.5381926894187927, "rewards/check_gptzero_func/std": 0.29294440150260925, "sampling/importance_sampling_ratio/max": 1.7267394065856934, "sampling/importance_sampling_ratio/mean": 1.0004633665084839, "sampling/importance_sampling_ratio/min": 0.5161957740783691, "sampling/sampling_logp_difference/max": 0.6612691879272461, "sampling/sampling_logp_difference/mean": 0.02047685533761978, "step": 323 }, { "clip_ratio/high_max": 0.011242271400988102, "clip_ratio/high_mean": 0.008316573686897755, "clip_ratio/low_mean": 0.005924056749790907, "clip_ratio/low_min": 0.0027351146563887596, "clip_ratio/region_mean": 0.014240629971027374, "entropy": 0.5351308584213257, "epoch": 2.219178082191781, "grad_norm": 1.1981135263207803, "kl": 0.17015691101551056, "learning_rate": 4.446917808219178e-07, "loss": -0.0193, "step": 324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 547.1785888671875, "completions/mean_terminated_length": 547.1785888671875, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.5683521032333374, "epoch": 2.2260273972602738, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 3.387821801331936, "kl": 0.5976700782775879, "learning_rate": 4.4452054794520543e-07, "loss": 0.0013, "num_tokens": 5742871.0, "reward": 0.46501031517982483, "reward_std": 0.08236892521381378, "rewards/check_gptzero_func/mean": 0.46501031517982483, "rewards/check_gptzero_func/std": 0.27670907974243164, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0000934600830078, "sampling/importance_sampling_ratio/min": 0.5038750767707825, "sampling/sampling_logp_difference/max": 0.7055859565734863, "sampling/sampling_logp_difference/mean": 0.021777693182229996, "step": 325 }, { "clip_ratio/high_max": 0.016836509108543396, "clip_ratio/high_mean": 0.008763973601162434, "clip_ratio/low_mean": 0.006176672875881195, "clip_ratio/low_min": 0.001914791762828827, "clip_ratio/region_mean": 0.014940647408366203, "entropy": 0.5747013092041016, "epoch": 2.232876712328767, "grad_norm": 15.035139441685082, "kl": 0.19010598957538605, "learning_rate": 4.443493150684932e-07, "loss": 0.0056, "step": 326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1160.0, "completions/max_terminated_length": 1160.0, "completions/mean_length": 439.4464416503906, "completions/mean_terminated_length": 439.4464416503906, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.5099381804466248, "epoch": 2.23972602739726, "frac_reward_zero_std": 0.0, "grad_norm": 1.4546006268787952, "kl": 0.282457560300827, "learning_rate": 4.4417808219178083e-07, "loss": -0.0058, "num_tokens": 5772510.0, "reward": 0.49658238887786865, "reward_std": 0.09668020904064178, "rewards/check_gptzero_func/mean": 0.49658235907554626, "rewards/check_gptzero_func/std": 0.2963210344314575, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9995713829994202, "sampling/importance_sampling_ratio/min": 0.3384910225868225, "sampling/sampling_logp_difference/max": 1.0832576751708984, "sampling/sampling_logp_difference/mean": 0.019997326657176018, "step": 327 }, { "clip_ratio/high_max": 0.01071784645318985, "clip_ratio/high_mean": 0.00811596866697073, "clip_ratio/low_mean": 0.0071211871691048145, "clip_ratio/low_min": 0.004984050989151001, "clip_ratio/region_mean": 0.015237157233059406, "entropy": 0.5104427933692932, "epoch": 2.2465753424657535, "grad_norm": 1.2591579949455582, "kl": 0.27270784974098206, "learning_rate": 4.440068493150685e-07, "loss": -0.0116, "step": 328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 981.0, "completions/max_terminated_length": 981.0, "completions/mean_length": 344.1071472167969, "completions/mean_terminated_length": 344.1071472167969, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.5007592439651489, "epoch": 2.2534246575342465, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.6352453073389321, "kl": 0.27119380235671997, "learning_rate": 4.4383561643835613e-07, "loss": -0.0279, "num_tokens": 5796596.0, "reward": 0.5591997504234314, "reward_std": 0.07928477972745895, "rewards/check_gptzero_func/mean": 0.5591996908187866, "rewards/check_gptzero_func/std": 0.31459519267082214, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.00017249584198, "sampling/importance_sampling_ratio/min": 0.39690446853637695, "sampling/sampling_logp_difference/max": 1.062560796737671, "sampling/sampling_logp_difference/mean": 0.02067774161696434, "step": 329 }, { "clip_ratio/high_max": 0.01694362983107567, "clip_ratio/high_mean": 0.011728775687515736, "clip_ratio/low_mean": 0.007998752407729626, "clip_ratio/low_min": 0.004938271827995777, "clip_ratio/region_mean": 0.01972752995789051, "entropy": 0.5041824579238892, "epoch": 2.26027397260274, "grad_norm": 1.6041566161019194, "kl": 0.2852722704410553, "learning_rate": 4.4366438356164384e-07, "loss": -0.0345, "step": 330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 472.9464416503906, "completions/mean_terminated_length": 472.9464416503906, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.524861752986908, "epoch": 2.267123287671233, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.385186940719672, "kl": 0.2774750888347626, "learning_rate": 4.434931506849315e-07, "loss": 0.0105, "num_tokens": 5827533.0, "reward": 0.510645866394043, "reward_std": 0.07086554169654846, "rewards/check_gptzero_func/mean": 0.5106458067893982, "rewards/check_gptzero_func/std": 0.22217275202274323, "sampling/importance_sampling_ratio/max": 1.6565481424331665, "sampling/importance_sampling_ratio/mean": 1.0000337362289429, "sampling/importance_sampling_ratio/min": 0.14644774794578552, "sampling/sampling_logp_difference/max": 1.9210865497589111, "sampling/sampling_logp_difference/mean": 0.021223943680524826, "step": 331 }, { "clip_ratio/high_max": 0.008688096888363361, "clip_ratio/high_mean": 0.007030278444290161, "clip_ratio/low_mean": 0.00437760166823864, "clip_ratio/low_min": 0.001805054140277207, "clip_ratio/region_mean": 0.011407880112528801, "entropy": 0.527859091758728, "epoch": 2.2739726027397262, "grad_norm": 1.1563407071363971, "kl": 0.23142682015895844, "learning_rate": 4.4332191780821914e-07, "loss": 0.0042, "step": 332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2118.0, "completions/max_terminated_length": 2118.0, "completions/mean_length": 549.8214721679688, "completions/mean_terminated_length": 549.8214721679688, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "entropy": 0.5826317667961121, "epoch": 2.280821917808219, "frac_reward_zero_std": 0.0, "grad_norm": 1.174186659191804, "kl": 0.18033206462860107, "learning_rate": 4.431506849315068e-07, "loss": -0.0288, "num_tokens": 5863887.0, "reward": 0.48685768246650696, "reward_std": 0.07865818589925766, "rewards/check_gptzero_func/mean": 0.48685765266418457, "rewards/check_gptzero_func/std": 0.2861599028110504, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0004788637161255, "sampling/importance_sampling_ratio/min": 0.4959690570831299, "sampling/sampling_logp_difference/max": 0.8144280910491943, "sampling/sampling_logp_difference/mean": 0.021289529278874397, "step": 333 }, { "clip_ratio/high_max": 0.008760951459407806, "clip_ratio/high_mean": 0.006930656265467405, "clip_ratio/low_mean": 0.004886273760348558, "clip_ratio/low_min": 0.004431314766407013, "clip_ratio/region_mean": 0.011816930957138538, "entropy": 0.5822619795799255, "epoch": 2.287671232876712, "grad_norm": 1.04401216192574, "kl": 0.18246306478977203, "learning_rate": 4.429794520547945e-07, "loss": -0.0345, "step": 334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 502.33929443359375, "completions/mean_terminated_length": 502.33929443359375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.5961974263191223, "epoch": 2.2945205479452055, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.2261688452787516, "kl": 0.1952582448720932, "learning_rate": 4.428082191780822e-07, "loss": 0.004, "num_tokens": 5896470.0, "reward": 0.5950523614883423, "reward_std": 0.08764978498220444, "rewards/check_gptzero_func/mean": 0.5950523018836975, "rewards/check_gptzero_func/std": 0.2470317929983139, "sampling/importance_sampling_ratio/max": 1.621711254119873, "sampling/importance_sampling_ratio/mean": 1.0003172159194946, "sampling/importance_sampling_ratio/min": 0.6558547019958496, "sampling/sampling_logp_difference/max": 0.48348188400268555, "sampling/sampling_logp_difference/mean": 0.02127649635076523, "step": 335 }, { "clip_ratio/high_max": 0.009599470533430576, "clip_ratio/high_mean": 0.006793014705181122, "clip_ratio/low_mean": 0.004375914577394724, "clip_ratio/low_min": 0.0017094017239287496, "clip_ratio/region_mean": 0.011168928816914558, "entropy": 0.5971493124961853, "epoch": 2.3013698630136985, "grad_norm": 1.0940247791254902, "kl": 0.19421903789043427, "learning_rate": 4.4263698630136985e-07, "loss": -0.0018, "step": 336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 546.4464721679688, "completions/mean_terminated_length": 546.4464721679688, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "entropy": 0.6059335470199585, "epoch": 2.308219178082192, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.2093090388034018, "kl": 0.19652105867862701, "learning_rate": 4.4246575342465755e-07, "loss": -0.0092, "num_tokens": 5932269.0, "reward": 0.5261946320533752, "reward_std": 0.09106195718050003, "rewards/check_gptzero_func/mean": 0.5261946320533752, "rewards/check_gptzero_func/std": 0.2831484377384186, "sampling/importance_sampling_ratio/max": 1.8718546628952026, "sampling/importance_sampling_ratio/mean": 0.9999179840087891, "sampling/importance_sampling_ratio/min": 0.48236799240112305, "sampling/sampling_logp_difference/max": 0.7290480136871338, "sampling/sampling_logp_difference/mean": 0.021724309772253036, "step": 337 }, { "clip_ratio/high_max": 0.007755187805742025, "clip_ratio/high_mean": 0.006332312244921923, "clip_ratio/low_mean": 0.004775951616466045, "clip_ratio/low_min": 0.002471289364621043, "clip_ratio/region_mean": 0.01110826339572668, "entropy": 0.6062563061714172, "epoch": 2.315068493150685, "grad_norm": 1.0737742042046157, "kl": 0.18711026012897491, "learning_rate": 4.422945205479452e-07, "loss": -0.015, "step": 338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 422.0714416503906, "completions/mean_terminated_length": 422.0714416503906, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.5208082795143127, "epoch": 2.3219178082191783, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3554178568734532, "kl": 0.2529573142528534, "learning_rate": 4.4212328767123285e-07, "loss": -0.0108, "num_tokens": 5960621.0, "reward": 0.5239826440811157, "reward_std": 0.056644998490810394, "rewards/check_gptzero_func/mean": 0.523982584476471, "rewards/check_gptzero_func/std": 0.2827340066432953, "sampling/importance_sampling_ratio/max": 1.6237726211547852, "sampling/importance_sampling_ratio/mean": 0.9995747804641724, "sampling/importance_sampling_ratio/min": 0.4084649980068207, "sampling/sampling_logp_difference/max": 0.8953490257263184, "sampling/sampling_logp_difference/mean": 0.020918196067214012, "step": 339 }, { "clip_ratio/high_max": 0.012015292420983315, "clip_ratio/high_mean": 0.009131716564297676, "clip_ratio/low_mean": 0.005606485065072775, "clip_ratio/low_min": 0.0032768978271633387, "clip_ratio/region_mean": 0.014738202095031738, "entropy": 0.5215625166893005, "epoch": 2.328767123287671, "grad_norm": 10.999093054008128, "kl": 0.4277959167957306, "learning_rate": 4.419520547945205e-07, "loss": -0.0157, "step": 340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 498.39288330078125, "completions/mean_terminated_length": 498.39288330078125, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.48408079147338867, "epoch": 2.3356164383561646, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.2149410103149525, "kl": 0.18414750695228577, "learning_rate": 4.417808219178082e-07, "loss": 0.013, "num_tokens": 5993531.0, "reward": 0.6020358800888062, "reward_std": 0.07768404483795166, "rewards/check_gptzero_func/mean": 0.6020358800888062, "rewards/check_gptzero_func/std": 0.27497702836990356, "sampling/importance_sampling_ratio/max": 1.8089981079101562, "sampling/importance_sampling_ratio/mean": 1.00001060962677, "sampling/importance_sampling_ratio/min": 0.353055477142334, "sampling/sampling_logp_difference/max": 1.0411300659179688, "sampling/sampling_logp_difference/mean": 0.018457500264048576, "step": 341 }, { "clip_ratio/high_max": 0.00884554348886013, "clip_ratio/high_mean": 0.007197525817900896, "clip_ratio/low_mean": 0.005048460327088833, "clip_ratio/low_min": 0.002610966097563505, "clip_ratio/region_mean": 0.012245984748005867, "entropy": 0.48390594124794006, "epoch": 2.3424657534246576, "grad_norm": 1.2024866266145093, "kl": 0.18784502148628235, "learning_rate": 4.4160958904109585e-07, "loss": 0.0071, "step": 342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1115.0, "completions/max_terminated_length": 1115.0, "completions/mean_length": 370.5535888671875, "completions/mean_terminated_length": 370.5535888671875, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "entropy": 0.48136597871780396, "epoch": 2.3493150684931505, "frac_reward_zero_std": 0.0, "grad_norm": 1.4758852545703913, "kl": 0.2730221450328827, "learning_rate": 4.414383561643835e-07, "loss": -0.0155, "num_tokens": 6019734.0, "reward": 0.5448682308197021, "reward_std": 0.0982283428311348, "rewards/check_gptzero_func/mean": 0.5448681712150574, "rewards/check_gptzero_func/std": 0.2837560176849365, "sampling/importance_sampling_ratio/max": 1.928285002708435, "sampling/importance_sampling_ratio/mean": 1.0001182556152344, "sampling/importance_sampling_ratio/min": 0.31527179479599, "sampling/sampling_logp_difference/max": 1.154320240020752, "sampling/sampling_logp_difference/mean": 0.019632799550890923, "step": 343 }, { "clip_ratio/high_max": 0.012820512987673283, "clip_ratio/high_mean": 0.009711181744933128, "clip_ratio/low_mean": 0.006801821291446686, "clip_ratio/low_min": 0.0045045046135783195, "clip_ratio/region_mean": 0.016513003036379814, "entropy": 0.48113760352134705, "epoch": 2.356164383561644, "grad_norm": 1.4440352214414858, "kl": 0.275266170501709, "learning_rate": 4.4126712328767126e-07, "loss": -0.0226, "step": 344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1048.0, "completions/max_terminated_length": 1048.0, "completions/mean_length": 447.1785888671875, "completions/mean_terminated_length": 447.1785888671875, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.5238921046257019, "epoch": 2.363013698630137, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.4688258920568844, "kl": 0.26291829347610474, "learning_rate": 4.410958904109589e-07, "loss": -0.0162, "num_tokens": 6050144.0, "reward": 0.5150052309036255, "reward_std": 0.06541639566421509, "rewards/check_gptzero_func/mean": 0.5150051712989807, "rewards/check_gptzero_func/std": 0.23789198696613312, "sampling/importance_sampling_ratio/max": 1.6141045093536377, "sampling/importance_sampling_ratio/mean": 0.9998749494552612, "sampling/importance_sampling_ratio/min": 0.5676555037498474, "sampling/sampling_logp_difference/max": 0.5662405490875244, "sampling/sampling_logp_difference/mean": 0.021707328036427498, "step": 345 }, { "clip_ratio/high_max": 0.023043304681777954, "clip_ratio/high_mean": 0.014665218070149422, "clip_ratio/low_mean": 0.008524669334292412, "clip_ratio/low_min": 0.005283293779939413, "clip_ratio/region_mean": 0.023189887404441833, "entropy": 0.528393566608429, "epoch": 2.3698630136986303, "grad_norm": 1.3905988683259771, "kl": 0.23564723134040833, "learning_rate": 4.4092465753424656e-07, "loss": -0.0223, "step": 346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 454.3214416503906, "completions/mean_terminated_length": 454.3214416503906, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.541931688785553, "epoch": 2.3767123287671232, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.2944724895999313, "kl": 0.2431774139404297, "learning_rate": 4.407534246575342e-07, "loss": 0.0086, "num_tokens": 6080448.0, "reward": 0.526686429977417, "reward_std": 0.08098059147596359, "rewards/check_gptzero_func/mean": 0.5266863703727722, "rewards/check_gptzero_func/std": 0.2535557746887207, "sampling/importance_sampling_ratio/max": 1.6157478094100952, "sampling/importance_sampling_ratio/mean": 0.9995589852333069, "sampling/importance_sampling_ratio/min": 0.4161674380302429, "sampling/sampling_logp_difference/max": 0.8766676187515259, "sampling/sampling_logp_difference/mean": 0.020741600543260574, "step": 347 }, { "clip_ratio/high_max": 0.01104972418397665, "clip_ratio/high_mean": 0.00883796438574791, "clip_ratio/low_mean": 0.005599420052021742, "clip_ratio/low_min": 0.0036515388637781143, "clip_ratio/region_mean": 0.014437383972108364, "entropy": 0.5431872606277466, "epoch": 2.383561643835616, "grad_norm": 1.2244649655046511, "kl": 0.23803982138633728, "learning_rate": 4.405821917808219e-07, "loss": 0.0023, "step": 348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1129.0, "completions/max_terminated_length": 1129.0, "completions/mean_length": 471.4464416503906, "completions/mean_terminated_length": 471.4464416503906, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.5196276307106018, "epoch": 2.3904109589041096, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.3963690177067751, "kl": 0.2425893098115921, "learning_rate": 4.4041095890410957e-07, "loss": 0.0029, "num_tokens": 6111791.0, "reward": 0.5268763899803162, "reward_std": 0.07649117708206177, "rewards/check_gptzero_func/mean": 0.5268763899803162, "rewards/check_gptzero_func/std": 0.26525890827178955, "sampling/importance_sampling_ratio/max": 1.5514990091323853, "sampling/importance_sampling_ratio/mean": 1.0001332759857178, "sampling/importance_sampling_ratio/min": 0.6229923367500305, "sampling/sampling_logp_difference/max": 0.4732210636138916, "sampling/sampling_logp_difference/mean": 0.020380228757858276, "step": 349 }, { "clip_ratio/high_max": 0.011003405787050724, "clip_ratio/high_mean": 0.008575408719480038, "clip_ratio/low_mean": 0.004685975611209869, "clip_ratio/low_min": 0.0020869565196335316, "clip_ratio/region_mean": 0.013261384330689907, "entropy": 0.5192122459411621, "epoch": 2.3972602739726026, "grad_norm": 1.1386795455860046, "kl": 0.24036870896816254, "learning_rate": 4.402397260273972e-07, "loss": -0.0032, "step": 350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 380.6250305175781, "completions/mean_terminated_length": 380.6250305175781, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.4517695903778076, "epoch": 2.404109589041096, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.4906422652068443, "kl": 0.29860904812812805, "learning_rate": 4.400684931506849e-07, "loss": -0.0167, "num_tokens": 6137996.0, "reward": 0.5748412609100342, "reward_std": 0.09456776082515717, "rewards/check_gptzero_func/mean": 0.5748412013053894, "rewards/check_gptzero_func/std": 0.2727135717868805, "sampling/importance_sampling_ratio/max": 1.7642818689346313, "sampling/importance_sampling_ratio/mean": 1.0007543563842773, "sampling/importance_sampling_ratio/min": 0.324964702129364, "sampling/sampling_logp_difference/max": 1.1240386962890625, "sampling/sampling_logp_difference/mean": 0.01842278055846691, "step": 351 }, { "clip_ratio/high_max": 0.011895321309566498, "clip_ratio/high_mean": 0.008811316452920437, "clip_ratio/low_mean": 0.005877100862562656, "clip_ratio/low_min": 0.0021960472222417593, "clip_ratio/region_mean": 0.014688417315483093, "entropy": 0.45264795422554016, "epoch": 2.410958904109589, "grad_norm": 10.188444340409431, "kl": 0.6589894890785217, "learning_rate": 4.3989726027397257e-07, "loss": -0.0224, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1508.0, "completions/max_terminated_length": 1508.0, "completions/mean_length": 324.3571472167969, "completions/mean_terminated_length": 324.3571472167969, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.42769941687583923, "epoch": 2.4178082191780823, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.53416896602162, "kl": 0.33733072876930237, "learning_rate": 4.397260273972603e-07, "loss": 0.0119, "num_tokens": 6161076.0, "reward": 0.5977830290794373, "reward_std": 0.05232352390885353, "rewards/check_gptzero_func/mean": 0.5977830290794373, "rewards/check_gptzero_func/std": 0.27511847019195557, "sampling/importance_sampling_ratio/max": 1.8179774284362793, "sampling/importance_sampling_ratio/mean": 1.0000580549240112, "sampling/importance_sampling_ratio/min": 0.5146942138671875, "sampling/sampling_logp_difference/max": 0.6641823053359985, "sampling/sampling_logp_difference/mean": 0.018229885026812553, "step": 353 }, { "clip_ratio/high_max": 0.012512682005763054, "clip_ratio/high_mean": 0.009866737760603428, "clip_ratio/low_mean": 0.006778375245630741, "clip_ratio/low_min": 0.0034317090176045895, "clip_ratio/region_mean": 0.016645114868879318, "entropy": 0.42860183119773865, "epoch": 2.4246575342465753, "grad_norm": 1.3402989479626997, "kl": 0.3160437047481537, "learning_rate": 4.395547945205479e-07, "loss": 0.0051, "step": 354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1665.0, "completions/max_terminated_length": 1665.0, "completions/mean_length": 410.8750305175781, "completions/mean_terminated_length": 410.8750305175781, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.4875812232494354, "epoch": 2.4315068493150687, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.4324903966217586, "kl": 0.3544375002384186, "learning_rate": 4.3938356164383563e-07, "loss": -0.0276, "num_tokens": 6188879.0, "reward": 0.5211838483810425, "reward_std": 0.07214029133319855, "rewards/check_gptzero_func/mean": 0.5211838483810425, "rewards/check_gptzero_func/std": 0.268445760011673, "sampling/importance_sampling_ratio/max": 1.5803440809249878, "sampling/importance_sampling_ratio/mean": 0.9999149441719055, "sampling/importance_sampling_ratio/min": 0.2991293966770172, "sampling/sampling_logp_difference/max": 1.2068790197372437, "sampling/sampling_logp_difference/mean": 0.020419400185346603, "step": 355 }, { "clip_ratio/high_max": 0.014322916977107525, "clip_ratio/high_mean": 0.009915272705256939, "clip_ratio/low_mean": 0.006155786570161581, "clip_ratio/low_min": 0.004563233349472284, "clip_ratio/region_mean": 0.016071060672402382, "entropy": 0.48954081535339355, "epoch": 2.4383561643835616, "grad_norm": 1.343758258655348, "kl": 0.33561521768569946, "learning_rate": 4.392123287671233e-07, "loss": -0.0345, "step": 356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 422.3571472167969, "completions/mean_terminated_length": 422.3571472167969, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.5816812515258789, "epoch": 2.4452054794520546, "frac_reward_zero_std": 0.0, "grad_norm": 1.385774473596232, "kl": 0.26461127400398254, "learning_rate": 4.3904109589041093e-07, "loss": 0.0025, "num_tokens": 6217039.0, "reward": 0.45055684447288513, "reward_std": 0.115142822265625, "rewards/check_gptzero_func/mean": 0.45055681467056274, "rewards/check_gptzero_func/std": 0.27769240736961365, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9998016953468323, "sampling/importance_sampling_ratio/min": 0.40766820311546326, "sampling/sampling_logp_difference/max": 0.9749031066894531, "sampling/sampling_logp_difference/mean": 0.02192218042910099, "step": 357 }, { "clip_ratio/high_max": 0.013755598105490208, "clip_ratio/high_mean": 0.008956960402429104, "clip_ratio/low_mean": 0.006312993820756674, "clip_ratio/low_min": 0.0030546009074896574, "clip_ratio/region_mean": 0.015269954688847065, "entropy": 0.5829386115074158, "epoch": 2.452054794520548, "grad_norm": 1.20250708366444, "kl": 0.26861128211021423, "learning_rate": 4.388698630136986e-07, "loss": -0.0042, "step": 358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1295.0, "completions/max_terminated_length": 1295.0, "completions/mean_length": 378.51788330078125, "completions/mean_terminated_length": 378.51788330078125, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "entropy": 0.43776753544807434, "epoch": 2.458904109589041, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 3.182454136676163, "kl": 0.6513305306434631, "learning_rate": 4.386986301369863e-07, "loss": 0.0136, "num_tokens": 6244318.0, "reward": 0.5972626805305481, "reward_std": 0.060601621866226196, "rewards/check_gptzero_func/mean": 0.5972626805305481, "rewards/check_gptzero_func/std": 0.2945336699485779, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9999793171882629, "sampling/importance_sampling_ratio/min": 0.6096009016036987, "sampling/sampling_logp_difference/max": 0.7044198513031006, "sampling/sampling_logp_difference/mean": 0.018176868557929993, "step": 359 }, { "clip_ratio/high_max": 0.008199521340429783, "clip_ratio/high_mean": 0.005862611345946789, "clip_ratio/low_mean": 0.003998142201453447, "clip_ratio/low_min": 0.0026486876886337996, "clip_ratio/region_mean": 0.009860754013061523, "entropy": 0.43815335631370544, "epoch": 2.4657534246575343, "grad_norm": 1.346884182629006, "kl": 0.3273450434207916, "learning_rate": 4.3852739726027393e-07, "loss": 0.008, "step": 360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1658.0, "completions/max_terminated_length": 1658.0, "completions/mean_length": 511.96429443359375, "completions/mean_terminated_length": 511.96429443359375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.5242536664009094, "epoch": 2.4726027397260273, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.2018900774217327, "kl": 0.23796595633029938, "learning_rate": 4.383561643835616e-07, "loss": -0.0173, "num_tokens": 6278174.0, "reward": 0.41934940218925476, "reward_std": 0.11097440123558044, "rewards/check_gptzero_func/mean": 0.41934940218925476, "rewards/check_gptzero_func/std": 0.2548699378967285, "sampling/importance_sampling_ratio/max": 1.7870146036148071, "sampling/importance_sampling_ratio/mean": 1.0000799894332886, "sampling/importance_sampling_ratio/min": 0.49544984102249146, "sampling/sampling_logp_difference/max": 0.7022891044616699, "sampling/sampling_logp_difference/mean": 0.020594948902726173, "step": 361 }, { "clip_ratio/high_max": 0.01089137326925993, "clip_ratio/high_mean": 0.008858802728354931, "clip_ratio/low_mean": 0.005923810880631208, "clip_ratio/low_min": 0.0030562346801161766, "clip_ratio/region_mean": 0.014782614074647427, "entropy": 0.525816023349762, "epoch": 2.4794520547945207, "grad_norm": 1.0592155365471423, "kl": 0.23505043983459473, "learning_rate": 4.3818493150684934e-07, "loss": -0.0233, "step": 362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 444.58929443359375, "completions/mean_terminated_length": 444.58929443359375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.5393292307853699, "epoch": 2.4863013698630136, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.4287295205845958, "kl": 0.3168797791004181, "learning_rate": 4.38013698630137e-07, "loss": -0.0147, "num_tokens": 6307819.0, "reward": 0.4957086145877838, "reward_std": 0.11381091922521591, "rewards/check_gptzero_func/mean": 0.4957086145877838, "rewards/check_gptzero_func/std": 0.28774523735046387, "sampling/importance_sampling_ratio/max": 1.9781683683395386, "sampling/importance_sampling_ratio/mean": 1.0000237226486206, "sampling/importance_sampling_ratio/min": 0.37653112411499023, "sampling/sampling_logp_difference/max": 0.9767546653747559, "sampling/sampling_logp_difference/mean": 0.02109498716890812, "step": 363 }, { "clip_ratio/high_max": 0.012304622679948807, "clip_ratio/high_mean": 0.009451137855648994, "clip_ratio/low_mean": 0.005537826102226973, "clip_ratio/low_min": 0.003333986969664693, "clip_ratio/region_mean": 0.014988965354859829, "entropy": 0.5409734845161438, "epoch": 2.493150684931507, "grad_norm": 1.2337690128286982, "kl": 0.3074970841407776, "learning_rate": 4.3784246575342464e-07, "loss": -0.0212, "step": 364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1374.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 426.3571472167969, "completions/mean_terminated_length": 426.3571472167969, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.47863492369651794, "epoch": 2.5, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.3628334973005491, "kl": 0.32587602734565735, "learning_rate": 4.376712328767123e-07, "loss": 0.0162, "num_tokens": 6337145.0, "reward": 0.5061798095703125, "reward_std": 0.07924666255712509, "rewards/check_gptzero_func/mean": 0.5061798095703125, "rewards/check_gptzero_func/std": 0.2720505893230438, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9996888041496277, "sampling/importance_sampling_ratio/min": 0.5261460542678833, "sampling/sampling_logp_difference/max": 0.7213890552520752, "sampling/sampling_logp_difference/mean": 0.019973335787653923, "step": 365 }, { "clip_ratio/high_max": 0.013333333656191826, "clip_ratio/high_mean": 0.008929580450057983, "clip_ratio/low_mean": 0.005534167401492596, "clip_ratio/low_min": 0.002723735524341464, "clip_ratio/region_mean": 0.014463746920228004, "entropy": 0.47808384895324707, "epoch": 2.506849315068493, "grad_norm": 1.3023442171845983, "kl": 0.33683428168296814, "learning_rate": 4.375e-07, "loss": 0.0097, "step": 366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1454.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 393.1964416503906, "completions/mean_terminated_length": 393.1964416503906, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.47866135835647583, "epoch": 2.5136986301369864, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3593732571957302, "kl": 0.28751030564308167, "learning_rate": 4.3732876712328764e-07, "loss": 0.0173, "num_tokens": 6364428.0, "reward": 0.5641574859619141, "reward_std": 0.054378632456064224, "rewards/check_gptzero_func/mean": 0.5641574859619141, "rewards/check_gptzero_func/std": 0.30892232060432434, "sampling/importance_sampling_ratio/max": 1.7554733753204346, "sampling/importance_sampling_ratio/mean": 0.9997410178184509, "sampling/importance_sampling_ratio/min": 0.4890579581260681, "sampling/sampling_logp_difference/max": 0.7152743339538574, "sampling/sampling_logp_difference/mean": 0.019452985376119614, "step": 367 }, { "clip_ratio/high_max": 0.014705882407724857, "clip_ratio/high_mean": 0.010316318832337856, "clip_ratio/low_mean": 0.006715133786201477, "clip_ratio/low_min": 0.0022870211396366358, "clip_ratio/region_mean": 0.01703145168721676, "entropy": 0.4795633852481842, "epoch": 2.5205479452054793, "grad_norm": 1.231283931685333, "kl": 0.2826596200466156, "learning_rate": 4.371575342465753e-07, "loss": 0.0109, "step": 368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1213.0, "completions/max_terminated_length": 1213.0, "completions/mean_length": 339.25, "completions/mean_terminated_length": 339.25, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.4881637692451477, "epoch": 2.5273972602739727, "frac_reward_zero_std": 0.0, "grad_norm": 1.5329768669902646, "kl": 0.35909172892570496, "learning_rate": 4.36986301369863e-07, "loss": -0.0029, "num_tokens": 6388472.0, "reward": 0.5478863716125488, "reward_std": 0.029670462012290955, "rewards/check_gptzero_func/mean": 0.547886312007904, "rewards/check_gptzero_func/std": 0.23360289633274078, "sampling/importance_sampling_ratio/max": 1.6889640092849731, "sampling/importance_sampling_ratio/mean": 0.9993424415588379, "sampling/importance_sampling_ratio/min": 0.5343242883682251, "sampling/sampling_logp_difference/max": 0.6267523765563965, "sampling/sampling_logp_difference/mean": 0.02069867216050625, "step": 369 }, { "clip_ratio/high_max": 0.014971605502068996, "clip_ratio/high_mean": 0.011811417527496815, "clip_ratio/low_mean": 0.00950830988585949, "clip_ratio/low_min": 0.0063965884037315845, "clip_ratio/region_mean": 0.02131972648203373, "entropy": 0.48693135380744934, "epoch": 2.5342465753424657, "grad_norm": 1.2926820990833279, "kl": 0.3690389096736908, "learning_rate": 4.3681506849315065e-07, "loss": -0.0099, "step": 370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 371.8750305175781, "completions/mean_terminated_length": 371.8750305175781, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.5005078315734863, "epoch": 2.541095890410959, "frac_reward_zero_std": 0.0, "grad_norm": 1.5773805773089846, "kl": 0.32892486453056335, "learning_rate": 4.3664383561643835e-07, "loss": -0.0146, "num_tokens": 6413613.0, "reward": 0.4968803822994232, "reward_std": 0.06441093981266022, "rewards/check_gptzero_func/mean": 0.49688035249710083, "rewards/check_gptzero_func/std": 0.27004703879356384, "sampling/importance_sampling_ratio/max": 1.614837646484375, "sampling/importance_sampling_ratio/mean": 1.0006517171859741, "sampling/importance_sampling_ratio/min": 0.3154252767562866, "sampling/sampling_logp_difference/max": 1.1538333892822266, "sampling/sampling_logp_difference/mean": 0.020913904532790184, "step": 371 }, { "clip_ratio/high_max": 0.015599342994391918, "clip_ratio/high_mean": 0.012486735358834267, "clip_ratio/low_mean": 0.007952644489705563, "clip_ratio/low_min": 0.003559870645403862, "clip_ratio/region_mean": 0.020439380779862404, "entropy": 0.5018924474716187, "epoch": 2.547945205479452, "grad_norm": 1.6319713338210189, "kl": 0.3210611939430237, "learning_rate": 4.36472602739726e-07, "loss": -0.0211, "step": 372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1561.0, "completions/max_terminated_length": 1561.0, "completions/mean_length": 446.21429443359375, "completions/mean_terminated_length": 446.21429443359375, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.5236569046974182, "epoch": 2.5547945205479454, "frac_reward_zero_std": 0.0, "grad_norm": 1.4144231916586258, "kl": 0.36931848526000977, "learning_rate": 4.363013698630137e-07, "loss": 0.0038, "num_tokens": 6442953.0, "reward": 0.5188042521476746, "reward_std": 0.07135093957185745, "rewards/check_gptzero_func/mean": 0.5188042521476746, "rewards/check_gptzero_func/std": 0.24359619617462158, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0004427433013916, "sampling/importance_sampling_ratio/min": 0.585580587387085, "sampling/sampling_logp_difference/max": 1.5666821002960205, "sampling/sampling_logp_difference/mean": 0.021035242825746536, "step": 373 }, { "clip_ratio/high_max": 0.013458950445055962, "clip_ratio/high_mean": 0.009640119038522243, "clip_ratio/low_mean": 0.006436933763325214, "clip_ratio/low_min": 0.0031955689191818237, "clip_ratio/region_mean": 0.016077052801847458, "entropy": 0.5266221165657043, "epoch": 2.5616438356164384, "grad_norm": 118.01262462285369, "kl": 3.699385404586792, "learning_rate": 4.3613013698630136e-07, "loss": 0.01, "step": 374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 955.0, "completions/max_terminated_length": 955.0, "completions/mean_length": 267.33929443359375, "completions/mean_terminated_length": 267.33929443359375, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.4042033553123474, "epoch": 2.5684931506849313, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.8630697340868292, "kl": 0.40929198265075684, "learning_rate": 4.35958904109589e-07, "loss": -0.0155, "num_tokens": 6462578.0, "reward": 0.6429793238639832, "reward_std": 0.09341376274824142, "rewards/check_gptzero_func/mean": 0.6429792642593384, "rewards/check_gptzero_func/std": 0.28800803422927856, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0003482103347778, "sampling/importance_sampling_ratio/min": 0.6279610395431519, "sampling/sampling_logp_difference/max": 0.7109262943267822, "sampling/sampling_logp_difference/mean": 0.01746862754225731, "step": 375 }, { "clip_ratio/high_max": 0.017605634406208992, "clip_ratio/high_mean": 0.011141620576381683, "clip_ratio/low_mean": 0.008619687519967556, "clip_ratio/low_min": 0.006631928030401468, "clip_ratio/region_mean": 0.019761307165026665, "entropy": 0.40502843260765076, "epoch": 2.5753424657534247, "grad_norm": 1.5301863529767767, "kl": 0.4053165018558502, "learning_rate": 4.357876712328767e-07, "loss": -0.0229, "step": 376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1248.0, "completions/max_terminated_length": 1248.0, "completions/mean_length": 369.6071472167969, "completions/mean_terminated_length": 369.6071472167969, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.48452886939048767, "epoch": 2.5821917808219177, "frac_reward_zero_std": 0.0, "grad_norm": 1.4925889408931496, "kl": 0.351786345243454, "learning_rate": 4.3561643835616436e-07, "loss": 0.0049, "num_tokens": 6488332.0, "reward": 0.5272718667984009, "reward_std": 0.06169571354985237, "rewards/check_gptzero_func/mean": 0.5272718071937561, "rewards/check_gptzero_func/std": 0.24997250735759735, "sampling/importance_sampling_ratio/max": 1.98772394657135, "sampling/importance_sampling_ratio/mean": 1.000091314315796, "sampling/importance_sampling_ratio/min": 0.41695865988731384, "sampling/sampling_logp_difference/max": 0.8747682571411133, "sampling/sampling_logp_difference/mean": 0.020543448626995087, "step": 377 }, { "clip_ratio/high_max": 0.01343570090830326, "clip_ratio/high_mean": 0.01032219361513853, "clip_ratio/low_mean": 0.007150203920900822, "clip_ratio/low_min": 0.005576208233833313, "clip_ratio/region_mean": 0.017472397536039352, "entropy": 0.485281765460968, "epoch": 2.589041095890411, "grad_norm": 1.2553373107290446, "kl": 0.3495767116546631, "learning_rate": 4.35445205479452e-07, "loss": -0.0023, "step": 378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1058.0, "completions/max_terminated_length": 1058.0, "completions/mean_length": 403.3750305175781, "completions/mean_terminated_length": 403.3750305175781, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.5281578302383423, "epoch": 2.595890410958904, "frac_reward_zero_std": 0.0, "grad_norm": 1.4790594806910717, "kl": 0.31475380063056946, "learning_rate": 4.3527397260273966e-07, "loss": -0.0008, "num_tokens": 6515351.0, "reward": 0.5306413173675537, "reward_std": 0.056056827306747437, "rewards/check_gptzero_func/mean": 0.5306413173675537, "rewards/check_gptzero_func/std": 0.21112152934074402, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.000322699546814, "sampling/importance_sampling_ratio/min": 0.5805408954620361, "sampling/sampling_logp_difference/max": 0.756749153137207, "sampling/sampling_logp_difference/mean": 0.02085353247821331, "step": 379 }, { "clip_ratio/high_max": 0.013264869339764118, "clip_ratio/high_mean": 0.010631253011524677, "clip_ratio/low_mean": 0.008228059858083725, "clip_ratio/low_min": 0.006145741790533066, "clip_ratio/region_mean": 0.018859311938285828, "entropy": 0.529710590839386, "epoch": 2.602739726027397, "grad_norm": 1.2422149874808728, "kl": 0.30564355850219727, "learning_rate": 4.351027397260274e-07, "loss": -0.0076, "step": 380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1278.0, "completions/max_terminated_length": 1278.0, "completions/mean_length": 393.9107360839844, "completions/mean_terminated_length": 393.9107360839844, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "entropy": 0.4541463851928711, "epoch": 2.6095890410958904, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.5402802453269042, "kl": 0.4806044399738312, "learning_rate": 4.3493150684931507e-07, "loss": -0.0045, "num_tokens": 6542770.0, "reward": 0.48014914989471436, "reward_std": 0.09837791323661804, "rewards/check_gptzero_func/mean": 0.48014912009239197, "rewards/check_gptzero_func/std": 0.21812398731708527, "sampling/importance_sampling_ratio/max": 1.9876798391342163, "sampling/importance_sampling_ratio/mean": 0.999699592590332, "sampling/importance_sampling_ratio/min": 0.620280385017395, "sampling/sampling_logp_difference/max": 0.6869680881500244, "sampling/sampling_logp_difference/mean": 0.019307933747768402, "step": 381 }, { "clip_ratio/high_max": 0.011716276407241821, "clip_ratio/high_mean": 0.010299480520188808, "clip_ratio/low_mean": 0.008481173776090145, "clip_ratio/low_min": 0.0043798526749014854, "clip_ratio/region_mean": 0.018780652433633804, "entropy": 0.45596280694007874, "epoch": 2.616438356164384, "grad_norm": 961.8508442732921, "kl": 54.84061050415039, "learning_rate": 4.347602739726027e-07, "loss": 0.2168, "step": 382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 499.607177734375, "completions/mean_terminated_length": 499.607177734375, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "entropy": 0.5270594358444214, "epoch": 2.6232876712328768, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.236112062546048, "kl": 0.341948926448822, "learning_rate": 4.345890410958904e-07, "loss": -0.0047, "num_tokens": 6575380.0, "reward": 0.5320324301719666, "reward_std": 0.08044116944074631, "rewards/check_gptzero_func/mean": 0.5320323705673218, "rewards/check_gptzero_func/std": 0.2787308096885681, "sampling/importance_sampling_ratio/max": 1.8467936515808105, "sampling/importance_sampling_ratio/mean": 0.9999110102653503, "sampling/importance_sampling_ratio/min": 0.4389420747756958, "sampling/sampling_logp_difference/max": 0.823387861251831, "sampling/sampling_logp_difference/mean": 0.020454224199056625, "step": 383 }, { "clip_ratio/high_max": 0.01015939749777317, "clip_ratio/high_mean": 0.007409415673464537, "clip_ratio/low_mean": 0.007057120557874441, "clip_ratio/low_min": 0.0049778763204813, "clip_ratio/region_mean": 0.014466537162661552, "entropy": 0.525065541267395, "epoch": 2.6301369863013697, "grad_norm": 1.0898623919668384, "kl": 0.34138408303260803, "learning_rate": 4.3441780821917807e-07, "loss": -0.0112, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 330.5714416503906, "completions/mean_terminated_length": 330.5714416503906, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.47385916113853455, "epoch": 2.636986301369863, "frac_reward_zero_std": 0.0, "grad_norm": 1.5995675848667346, "kl": 0.38097307085990906, "learning_rate": 4.342465753424657e-07, "loss": 0.0172, "num_tokens": 6598586.0, "reward": 0.5006964802742004, "reward_std": 0.05070007964968681, "rewards/check_gptzero_func/mean": 0.5006964802742004, "rewards/check_gptzero_func/std": 0.3348168730735779, "sampling/importance_sampling_ratio/max": 1.609968900680542, "sampling/importance_sampling_ratio/mean": 0.9995049238204956, "sampling/importance_sampling_ratio/min": 0.4159579873085022, "sampling/sampling_logp_difference/max": 0.8771710395812988, "sampling/sampling_logp_difference/mean": 0.01950969733297825, "step": 385 }, { "clip_ratio/high_max": 0.016904383897781372, "clip_ratio/high_mean": 0.012360075488686562, "clip_ratio/low_mean": 0.008708975277841091, "clip_ratio/low_min": 0.005892255809158087, "clip_ratio/region_mean": 0.021069049835205078, "entropy": 0.47749337553977966, "epoch": 2.643835616438356, "grad_norm": 1.5771168840460084, "kl": 0.3566076159477234, "learning_rate": 4.3407534246575337e-07, "loss": 0.0103, "step": 386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 433.6964416503906, "completions/mean_terminated_length": 433.6964416503906, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "entropy": 0.46869978308677673, "epoch": 2.6506849315068495, "frac_reward_zero_std": 0.0, "grad_norm": 1.522183818189874, "kl": 0.4806085526943207, "learning_rate": 4.339041095890411e-07, "loss": 0.0055, "num_tokens": 6627967.0, "reward": 0.47637948393821716, "reward_std": 0.09981108456850052, "rewards/check_gptzero_func/mean": 0.4763794541358948, "rewards/check_gptzero_func/std": 0.2359376847743988, "sampling/importance_sampling_ratio/max": 1.725730299949646, "sampling/importance_sampling_ratio/mean": 0.9997712969779968, "sampling/importance_sampling_ratio/min": 0.39192110300064087, "sampling/sampling_logp_difference/max": 0.9366947412490845, "sampling/sampling_logp_difference/mean": 0.01944786123931408, "step": 387 }, { "clip_ratio/high_max": 0.01673228293657303, "clip_ratio/high_mean": 0.010520197451114655, "clip_ratio/low_mean": 0.006667367182672024, "clip_ratio/low_min": 0.0029519014060497284, "clip_ratio/region_mean": 0.017187563702464104, "entropy": 0.46980923414230347, "epoch": 2.6575342465753424, "grad_norm": 1.2249307349830156, "kl": 0.4251388907432556, "learning_rate": 4.337328767123287e-07, "loss": -0.001, "step": 388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1396.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 347.3035888671875, "completions/mean_terminated_length": 347.3035888671875, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.4607089161872864, "epoch": 2.6643835616438354, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.394855574968904, "kl": 0.40306344628334045, "learning_rate": 4.3356164383561643e-07, "loss": 0.0111, "num_tokens": 6652428.0, "reward": 0.4400685429573059, "reward_std": 0.04976855218410492, "rewards/check_gptzero_func/mean": 0.4400685131549835, "rewards/check_gptzero_func/std": 0.2714260518550873, "sampling/importance_sampling_ratio/max": 1.6308224201202393, "sampling/importance_sampling_ratio/mean": 1.0006357431411743, "sampling/importance_sampling_ratio/min": 0.47405391931533813, "sampling/sampling_logp_difference/max": 0.746434211730957, "sampling/sampling_logp_difference/mean": 0.01993658021092415, "step": 389 }, { "clip_ratio/high_max": 0.013133208267390728, "clip_ratio/high_mean": 0.010734798386693, "clip_ratio/low_mean": 0.00639480771496892, "clip_ratio/low_min": 0.0032715376000851393, "clip_ratio/region_mean": 0.017129605636000633, "entropy": 0.4612634778022766, "epoch": 2.671232876712329, "grad_norm": 1.1976648823125642, "kl": 0.4074338376522064, "learning_rate": 4.3339041095890413e-07, "loss": 0.0042, "step": 390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 955.0, "completions/max_terminated_length": 955.0, "completions/mean_length": 347.3035888671875, "completions/mean_terminated_length": 347.3035888671875, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.41801318526268005, "epoch": 2.678082191780822, "frac_reward_zero_std": 0.0, "grad_norm": 2.4909456078768133, "kl": 0.5542351007461548, "learning_rate": 4.332191780821918e-07, "loss": -0.0107, "num_tokens": 6676331.0, "reward": 0.5502007603645325, "reward_std": 0.07618315517902374, "rewards/check_gptzero_func/mean": 0.5502007603645325, "rewards/check_gptzero_func/std": 0.29640379548072815, "sampling/importance_sampling_ratio/max": 1.8083469867706299, "sampling/importance_sampling_ratio/mean": 0.9998525977134705, "sampling/importance_sampling_ratio/min": 0.4954153895378113, "sampling/sampling_logp_difference/max": 0.7023587226867676, "sampling/sampling_logp_difference/mean": 0.018922410905361176, "step": 391 }, { "clip_ratio/high_max": 0.015028177760541439, "clip_ratio/high_mean": 0.009631991386413574, "clip_ratio/low_mean": 0.006406615022569895, "clip_ratio/low_min": 0.004018754232674837, "clip_ratio/region_mean": 0.01603860594332218, "entropy": 0.4212682545185089, "epoch": 2.684931506849315, "grad_norm": 35.52232683292939, "kl": 0.39291253685951233, "learning_rate": 4.3304794520547943e-07, "loss": 0.0117, "step": 392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1106.0, "completions/max_terminated_length": 1106.0, "completions/mean_length": 357.21429443359375, "completions/mean_terminated_length": 357.21429443359375, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.44672590494155884, "epoch": 2.691780821917808, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.5414951248744748, "kl": 0.4069688618183136, "learning_rate": 4.328767123287671e-07, "loss": 0.0045, "num_tokens": 6701481.0, "reward": 0.6532638669013977, "reward_std": 0.04305213317275047, "rewards/check_gptzero_func/mean": 0.6532638669013977, "rewards/check_gptzero_func/std": 0.24861180782318115, "sampling/importance_sampling_ratio/max": 1.8179854154586792, "sampling/importance_sampling_ratio/mean": 0.9999054670333862, "sampling/importance_sampling_ratio/min": 0.4161491394042969, "sampling/sampling_logp_difference/max": 0.8767116069793701, "sampling/sampling_logp_difference/mean": 0.017541101202368736, "step": 393 }, { "clip_ratio/high_max": 0.013859722763299942, "clip_ratio/high_mean": 0.008706537075340748, "clip_ratio/low_mean": 0.005450035445392132, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.014156573452055454, "entropy": 0.44728904962539673, "epoch": 2.6986301369863015, "grad_norm": 1.3106062865110768, "kl": 0.40555137395858765, "learning_rate": 4.327054794520548e-07, "loss": -0.0026, "step": 394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 361.8750305175781, "completions/mean_terminated_length": 361.8750305175781, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.40999987721443176, "epoch": 2.7054794520547945, "frac_reward_zero_std": 0.0, "grad_norm": 1.516475889167668, "kl": 0.3939971625804901, "learning_rate": 4.3253424657534244e-07, "loss": 0.0142, "num_tokens": 6726502.0, "reward": 0.5073835849761963, "reward_std": 0.0931466668844223, "rewards/check_gptzero_func/mean": 0.5073835253715515, "rewards/check_gptzero_func/std": 0.28147372603416443, "sampling/importance_sampling_ratio/max": 1.8588435649871826, "sampling/importance_sampling_ratio/mean": 0.9997685551643372, "sampling/importance_sampling_ratio/min": 0.6026296019554138, "sampling/sampling_logp_difference/max": 0.6199545860290527, "sampling/sampling_logp_difference/mean": 0.018392082303762436, "step": 395 }, { "clip_ratio/high_max": 0.016189290210604668, "clip_ratio/high_mean": 0.011229202151298523, "clip_ratio/low_mean": 0.0076585812494158745, "clip_ratio/low_min": 0.0038801466580480337, "clip_ratio/region_mean": 0.018887784332036972, "entropy": 0.4097093939781189, "epoch": 2.712328767123288, "grad_norm": 1.247765036415858, "kl": 0.3966979384422302, "learning_rate": 4.323630136986301e-07, "loss": 0.0072, "step": 396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1014.0, "completions/max_terminated_length": 1014.0, "completions/mean_length": 334.9821472167969, "completions/mean_terminated_length": 334.9821472167969, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.4599270224571228, "epoch": 2.719178082191781, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.577214329508718, "kl": 0.378954142332077, "learning_rate": 4.3219178082191774e-07, "loss": 0.0189, "num_tokens": 6750107.0, "reward": 0.5170041918754578, "reward_std": 0.07116836309432983, "rewards/check_gptzero_func/mean": 0.5170041918754578, "rewards/check_gptzero_func/std": 0.22698760032653809, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0000053644180298, "sampling/importance_sampling_ratio/min": 0.629992663860321, "sampling/sampling_logp_difference/max": 0.7048373222351074, "sampling/sampling_logp_difference/mean": 0.019719120115041733, "step": 397 }, { "clip_ratio/high_max": 0.014973694458603859, "clip_ratio/high_mean": 0.010533074848353863, "clip_ratio/low_mean": 0.008765713311731815, "clip_ratio/low_min": 0.00485633360221982, "clip_ratio/region_mean": 0.019298788160085678, "entropy": 0.46044787764549255, "epoch": 2.7260273972602738, "grad_norm": 1.3036625478191632, "kl": 0.37954407930374146, "learning_rate": 4.320205479452055e-07, "loss": 0.0114, "step": 398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 363.96429443359375, "completions/mean_terminated_length": 363.96429443359375, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.5096673369407654, "epoch": 2.732876712328767, "frac_reward_zero_std": 0.0, "grad_norm": 1.7297390200657456, "kl": 0.443392276763916, "learning_rate": 4.3184931506849314e-07, "loss": -0.0137, "num_tokens": 6775905.0, "reward": 0.5720414519309998, "reward_std": 0.06845783442258835, "rewards/check_gptzero_func/mean": 0.5720415115356445, "rewards/check_gptzero_func/std": 0.3069859743118286, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9995999932289124, "sampling/importance_sampling_ratio/min": 0.4866352081298828, "sampling/sampling_logp_difference/max": 0.8505103588104248, "sampling/sampling_logp_difference/mean": 0.02016056329011917, "step": 399 }, { "clip_ratio/high_max": 0.02180028147995472, "clip_ratio/high_mean": 0.012681333348155022, "clip_ratio/low_mean": 0.009957070462405682, "clip_ratio/low_min": 0.006829524412751198, "clip_ratio/region_mean": 0.022638404741883278, "entropy": 0.5155047178268433, "epoch": 2.73972602739726, "grad_norm": 1.5666626177586331, "kl": 0.3678712248802185, "learning_rate": 4.316780821917808e-07, "loss": -0.0209, "step": 400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 953.0, "completions/max_terminated_length": 953.0, "completions/mean_length": 204.8928680419922, "completions/mean_terminated_length": 204.8928680419922, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.3398292362689972, "epoch": 2.7465753424657535, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.0925639197246864, "kl": 0.6407204866409302, "learning_rate": 4.315068493150685e-07, "loss": 0.0036, "num_tokens": 6792265.0, "reward": 0.575263500213623, "reward_std": 0.06375692039728165, "rewards/check_gptzero_func/mean": 0.575263500213623, "rewards/check_gptzero_func/std": 0.23549999296665192, "sampling/importance_sampling_ratio/max": 1.775329828262329, "sampling/importance_sampling_ratio/mean": 0.9998295903205872, "sampling/importance_sampling_ratio/min": 0.3969041109085083, "sampling/sampling_logp_difference/max": 0.924060583114624, "sampling/sampling_logp_difference/mean": 0.017583593726158142, "step": 401 }, { "clip_ratio/high_max": 0.017325017601251602, "clip_ratio/high_mean": 0.014203870669007301, "clip_ratio/low_mean": 0.0130988834425807, "clip_ratio/low_min": 0.008053691126406193, "clip_ratio/region_mean": 0.027302755042910576, "entropy": 0.34016117453575134, "epoch": 2.7534246575342465, "grad_norm": 1.6292870535202013, "kl": 0.6470683217048645, "learning_rate": 4.3133561643835615e-07, "loss": -0.0046, "step": 402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2660.0, "completions/max_terminated_length": 2660.0, "completions/mean_length": 289.46429443359375, "completions/mean_terminated_length": 289.46429443359375, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.4366687834262848, "epoch": 2.76027397260274, "frac_reward_zero_std": 0.0, "grad_norm": 1.698603875233388, "kl": 0.510408341884613, "learning_rate": 4.311643835616438e-07, "loss": -0.0649, "num_tokens": 6812937.0, "reward": 0.6191948652267456, "reward_std": 0.07697727531194687, "rewards/check_gptzero_func/mean": 0.6191948652267456, "rewards/check_gptzero_func/std": 0.281314879655838, "sampling/importance_sampling_ratio/max": 1.6539686918258667, "sampling/importance_sampling_ratio/mean": 1.0001378059387207, "sampling/importance_sampling_ratio/min": 0.47059664130210876, "sampling/sampling_logp_difference/max": 0.7537540197372437, "sampling/sampling_logp_difference/mean": 0.018406502902507782, "step": 403 }, { "clip_ratio/high_max": 0.017467249184846878, "clip_ratio/high_mean": 0.012923507951200008, "clip_ratio/low_mean": 0.009067223407328129, "clip_ratio/low_min": 0.0012027904158458114, "clip_ratio/region_mean": 0.021990731358528137, "entropy": 0.4362383186817169, "epoch": 2.767123287671233, "grad_norm": 1.4406381691211423, "kl": 0.5335387587547302, "learning_rate": 4.3099315068493145e-07, "loss": -0.0726, "step": 404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1532.0, "completions/max_terminated_length": 1532.0, "completions/mean_length": 436.0535888671875, "completions/mean_terminated_length": 436.0535888671875, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.5456203818321228, "epoch": 2.7739726027397262, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 5.588509054218288, "kl": 0.4990587532520294, "learning_rate": 4.3082191780821915e-07, "loss": 0.0087, "num_tokens": 6842144.0, "reward": 0.5128518342971802, "reward_std": 0.0734994113445282, "rewards/check_gptzero_func/mean": 0.5128517746925354, "rewards/check_gptzero_func/std": 0.2513873279094696, "sampling/importance_sampling_ratio/max": 1.6308645009994507, "sampling/importance_sampling_ratio/mean": 0.9994519948959351, "sampling/importance_sampling_ratio/min": 0.4982585310935974, "sampling/sampling_logp_difference/max": 0.6966361999511719, "sampling/sampling_logp_difference/mean": 0.020409222692251205, "step": 405 }, { "clip_ratio/high_max": 0.006661732215434313, "clip_ratio/high_mean": 0.004470824263989925, "clip_ratio/low_mean": 0.002008886309340596, "clip_ratio/low_min": 0.0004161464748904109, "clip_ratio/region_mean": 0.006479710340499878, "entropy": 0.5476876497268677, "epoch": 2.780821917808219, "grad_norm": 37.71423119918904, "kl": 0.29943227767944336, "learning_rate": 4.306506849315068e-07, "loss": 0.0856, "step": 406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 280.75, "completions/mean_terminated_length": 280.75, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.3436446189880371, "epoch": 2.787671232876712, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.8147888660138847, "kl": 0.5896466970443726, "learning_rate": 4.304794520547945e-07, "loss": -0.0404, "num_tokens": 6863106.0, "reward": 0.45376843214035034, "reward_std": 0.1481827348470688, "rewards/check_gptzero_func/mean": 0.45376846194267273, "rewards/check_gptzero_func/std": 0.3264678716659546, "sampling/importance_sampling_ratio/max": 1.5619797706604004, "sampling/importance_sampling_ratio/mean": 0.9990598559379578, "sampling/importance_sampling_ratio/min": 0.24553647637367249, "sampling/sampling_logp_difference/max": 1.4043097496032715, "sampling/sampling_logp_difference/mean": 0.017589351162314415, "step": 407 }, { "clip_ratio/high_max": 0.018375815823674202, "clip_ratio/high_mean": 0.015051992610096931, "clip_ratio/low_mean": 0.010563167743384838, "clip_ratio/low_min": 0.006162465084344149, "clip_ratio/region_mean": 0.025615159422159195, "entropy": 0.34669017791748047, "epoch": 2.7945205479452055, "grad_norm": 1.6210836562319118, "kl": 0.5651851296424866, "learning_rate": 4.303082191780822e-07, "loss": -0.0482, "step": 408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1236.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 360.7321472167969, "completions/mean_terminated_length": 360.7321472167969, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.48185867071151733, "epoch": 2.8013698630136985, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 3.1659219714606768, "kl": 0.5757498145103455, "learning_rate": 4.3013698630136986e-07, "loss": 0.0049, "num_tokens": 6887679.0, "reward": 0.5527956485748291, "reward_std": 0.07086410373449326, "rewards/check_gptzero_func/mean": 0.5527956485748291, "rewards/check_gptzero_func/std": 0.18975569307804108, "sampling/importance_sampling_ratio/max": 1.9366048574447632, "sampling/importance_sampling_ratio/mean": 1.000245451927185, "sampling/importance_sampling_ratio/min": 0.15420038998126984, "sampling/sampling_logp_difference/max": 1.869502305984497, "sampling/sampling_logp_difference/mean": 0.02080817148089409, "step": 409 }, { "clip_ratio/high_max": 0.01611047238111496, "clip_ratio/high_mean": 0.012086373753845692, "clip_ratio/low_mean": 0.010080897249281406, "clip_ratio/low_min": 0.004274334758520126, "clip_ratio/region_mean": 0.02216726914048195, "entropy": 0.4874576926231384, "epoch": 2.808219178082192, "grad_norm": 1.8680472915604787, "kl": 0.4244578778743744, "learning_rate": 4.299657534246575e-07, "loss": -0.0005, "step": 410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1071.0, "completions/max_terminated_length": 1071.0, "completions/mean_length": 309.08929443359375, "completions/mean_terminated_length": 309.08929443359375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.40117019414901733, "epoch": 2.815068493150685, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.335369249722214, "kl": 0.6320360898971558, "learning_rate": 4.2979452054794516e-07, "loss": -0.0039, "num_tokens": 6909792.0, "reward": 0.4469018280506134, "reward_std": 0.12168833613395691, "rewards/check_gptzero_func/mean": 0.446901798248291, "rewards/check_gptzero_func/std": 0.26492515206336975, "sampling/importance_sampling_ratio/max": 1.8226317167282104, "sampling/importance_sampling_ratio/mean": 1.0001661777496338, "sampling/importance_sampling_ratio/min": 0.5536726117134094, "sampling/sampling_logp_difference/max": 0.6002814769744873, "sampling/sampling_logp_difference/mean": 0.018712449818849564, "step": 411 }, { "clip_ratio/high_max": 0.018598616123199463, "clip_ratio/high_mean": 0.014899328351020813, "clip_ratio/low_mean": 0.011242958717048168, "clip_ratio/low_min": 0.005900620948523283, "clip_ratio/region_mean": 0.026142286136746407, "entropy": 0.40553173422813416, "epoch": 2.821917808219178, "grad_norm": 14.654467351644822, "kl": 0.548961341381073, "learning_rate": 4.2962328767123286e-07, "loss": -0.0015, "step": 412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1017.0, "completions/max_terminated_length": 1017.0, "completions/mean_length": 319.58929443359375, "completions/mean_terminated_length": 319.58929443359375, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.4400959014892578, "epoch": 2.828767123287671, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.7377028642428456, "kl": 0.45433083176612854, "learning_rate": 4.294520547945205e-07, "loss": 0.01, "num_tokens": 6932755.0, "reward": 0.5458387732505798, "reward_std": 0.06680107861757278, "rewards/check_gptzero_func/mean": 0.5458387732505798, "rewards/check_gptzero_func/std": 0.2765081524848938, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0003979206085205, "sampling/importance_sampling_ratio/min": 0.3941422700881958, "sampling/sampling_logp_difference/max": 0.9310433864593506, "sampling/sampling_logp_difference/mean": 0.01840110495686531, "step": 413 }, { "clip_ratio/high_max": 0.018324607983231544, "clip_ratio/high_mean": 0.0134058753028512, "clip_ratio/low_mean": 0.009672461077570915, "clip_ratio/low_min": 0.006601941771805286, "clip_ratio/region_mean": 0.02307833731174469, "entropy": 0.4435916543006897, "epoch": 2.8356164383561646, "grad_norm": 1.797911820319638, "kl": 0.440229207277298, "learning_rate": 4.2928082191780817e-07, "loss": 0.0034, "step": 414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 907.0, "completions/max_terminated_length": 907.0, "completions/mean_length": 289.875, "completions/mean_terminated_length": 289.875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.44761160016059875, "epoch": 2.8424657534246576, "frac_reward_zero_std": 0.0, "grad_norm": 1.6464956423734713, "kl": 0.48968005180358887, "learning_rate": 4.291095890410959e-07, "loss": -0.0124, "num_tokens": 6953706.0, "reward": 0.48406508564949036, "reward_std": 0.09204082190990448, "rewards/check_gptzero_func/mean": 0.48406505584716797, "rewards/check_gptzero_func/std": 0.23440155386924744, "sampling/importance_sampling_ratio/max": 1.7066946029663086, "sampling/importance_sampling_ratio/mean": 0.9999911189079285, "sampling/importance_sampling_ratio/min": 0.5006792545318604, "sampling/sampling_logp_difference/max": 0.6917896270751953, "sampling/sampling_logp_difference/mean": 0.02005869150161743, "step": 415 }, { "clip_ratio/high_max": 0.01672535203397274, "clip_ratio/high_mean": 0.012486062943935394, "clip_ratio/low_mean": 0.011432552710175514, "clip_ratio/low_min": 0.008739650249481201, "clip_ratio/region_mean": 0.02391861565411091, "entropy": 0.44617152214050293, "epoch": 2.8493150684931505, "grad_norm": 1.5176196106545599, "kl": 0.49294087290763855, "learning_rate": 4.2893835616438357e-07, "loss": -0.0204, "step": 416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1506.0, "completions/max_terminated_length": 1506.0, "completions/mean_length": 421.6964416503906, "completions/mean_terminated_length": 421.6964416503906, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.4881231188774109, "epoch": 2.856164383561644, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.4689672635783275, "kl": 0.40515750646591187, "learning_rate": 4.287671232876712e-07, "loss": 0.0026, "num_tokens": 6982689.0, "reward": 0.44296032190322876, "reward_std": 0.07812861353158951, "rewards/check_gptzero_func/mean": 0.44296029210090637, "rewards/check_gptzero_func/std": 0.27886784076690674, "sampling/importance_sampling_ratio/max": 1.9378750324249268, "sampling/importance_sampling_ratio/mean": 1.0002528429031372, "sampling/importance_sampling_ratio/min": 0.5260621905326843, "sampling/sampling_logp_difference/max": 0.6615920662879944, "sampling/sampling_logp_difference/mean": 0.019924351945519447, "step": 417 }, { "clip_ratio/high_max": 0.014714204706251621, "clip_ratio/high_mean": 0.011151783168315887, "clip_ratio/low_mean": 0.008180915378034115, "clip_ratio/low_min": 0.005079962313175201, "clip_ratio/region_mean": 0.019332697615027428, "entropy": 0.4903147220611572, "epoch": 2.863013698630137, "grad_norm": 9.622933259220977, "kl": 0.3781653940677643, "learning_rate": 4.2859589041095887e-07, "loss": 0.0029, "step": 418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1048.0, "completions/max_terminated_length": 1048.0, "completions/mean_length": 289.8035888671875, "completions/mean_terminated_length": 289.8035888671875, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.40226325392723083, "epoch": 2.8698630136986303, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 3.109188676337454, "kl": 0.4938289225101471, "learning_rate": 4.284246575342466e-07, "loss": 0.003, "num_tokens": 7004012.0, "reward": 0.6300808787345886, "reward_std": 0.06856049597263336, "rewards/check_gptzero_func/mean": 0.6300808787345886, "rewards/check_gptzero_func/std": 0.2889097034931183, "sampling/importance_sampling_ratio/max": 1.5285710096359253, "sampling/importance_sampling_ratio/mean": 1.000337839126587, "sampling/importance_sampling_ratio/min": 0.47668424248695374, "sampling/sampling_logp_difference/max": 0.740900993347168, "sampling/sampling_logp_difference/mean": 0.01848239079117775, "step": 419 }, { "clip_ratio/high_max": 0.01727941259741783, "clip_ratio/high_mean": 0.010779326781630516, "clip_ratio/low_mean": 0.007809518836438656, "clip_ratio/low_min": 0.004314477555453777, "clip_ratio/region_mean": 0.018588844686746597, "entropy": 0.40573710203170776, "epoch": 2.8767123287671232, "grad_norm": 1.4040885416744457, "kl": 0.47960060834884644, "learning_rate": 4.282534246575342e-07, "loss": -0.0034, "step": 420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 373.6964416503906, "completions/mean_terminated_length": 373.6964416503906, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "entropy": 0.49278396368026733, "epoch": 2.883561643835616, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.5426820936513759, "kl": 0.3736138939857483, "learning_rate": 4.280821917808219e-07, "loss": -0.0114, "num_tokens": 7030219.0, "reward": 0.522388756275177, "reward_std": 0.0557841882109642, "rewards/check_gptzero_func/mean": 0.522388756275177, "rewards/check_gptzero_func/std": 0.3114466071128845, "sampling/importance_sampling_ratio/max": 1.861733078956604, "sampling/importance_sampling_ratio/mean": 0.9999551773071289, "sampling/importance_sampling_ratio/min": 0.5010412931442261, "sampling/sampling_logp_difference/max": 0.6910667419433594, "sampling/sampling_logp_difference/mean": 0.020113438367843628, "step": 421 }, { "clip_ratio/high_max": 0.014097744598984718, "clip_ratio/high_mean": 0.012044970877468586, "clip_ratio/low_mean": 0.007508635520935059, "clip_ratio/low_min": 0.00491055753082037, "clip_ratio/region_mean": 0.01955360546708107, "entropy": 0.49515360593795776, "epoch": 2.8904109589041096, "grad_norm": 1.5777846715574655, "kl": 0.37602630257606506, "learning_rate": 4.279109589041096e-07, "loss": -0.0186, "step": 422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1123.0, "completions/max_terminated_length": 1123.0, "completions/mean_length": 206.62501525878906, "completions/mean_terminated_length": 206.62501525878906, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.3092096745967865, "epoch": 2.897260273972603, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.9459801051706191, "kl": 0.6974611282348633, "learning_rate": 4.2773972602739723e-07, "loss": -0.0098, "num_tokens": 7046540.0, "reward": 0.6350603103637695, "reward_std": 0.06905553489923477, "rewards/check_gptzero_func/mean": 0.6350603103637695, "rewards/check_gptzero_func/std": 0.3005734086036682, "sampling/importance_sampling_ratio/max": 1.663033127784729, "sampling/importance_sampling_ratio/mean": 0.9998154044151306, "sampling/importance_sampling_ratio/min": 0.27278798818588257, "sampling/sampling_logp_difference/max": 1.299060344696045, "sampling/sampling_logp_difference/mean": 0.01615697704255581, "step": 423 }, { "clip_ratio/high_max": 0.02364864945411682, "clip_ratio/high_mean": 0.015598760917782784, "clip_ratio/low_mean": 0.0127498684450984, "clip_ratio/low_min": 0.009309162385761738, "clip_ratio/region_mean": 0.028348630294203758, "entropy": 0.30916184186935425, "epoch": 2.904109589041096, "grad_norm": 1.7442343625644925, "kl": 0.708801805973053, "learning_rate": 4.2756849315068493e-07, "loss": -0.0187, "step": 424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 434.1785888671875, "completions/mean_terminated_length": 434.1785888671875, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.4685434401035309, "epoch": 2.910958904109589, "frac_reward_zero_std": 0.0, "grad_norm": 1.3436158820584096, "kl": 0.3609774708747864, "learning_rate": 4.273972602739726e-07, "loss": 0.0091, "num_tokens": 7075840.0, "reward": 0.5702441930770874, "reward_std": 0.05956185609102249, "rewards/check_gptzero_func/mean": 0.5702441930770874, "rewards/check_gptzero_func/std": 0.2732802927494049, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0000801086425781, "sampling/importance_sampling_ratio/min": 0.5384498238563538, "sampling/sampling_logp_difference/max": 1.0377089977264404, "sampling/sampling_logp_difference/mean": 0.019284896552562714, "step": 425 }, { "clip_ratio/high_max": 0.018401937559247017, "clip_ratio/high_mean": 0.010474522598087788, "clip_ratio/low_mean": 0.007852432318031788, "clip_ratio/low_min": 0.004313831217586994, "clip_ratio/region_mean": 0.018326954916119576, "entropy": 0.4673800468444824, "epoch": 2.9178082191780823, "grad_norm": 1.141204628286726, "kl": 0.35842370986938477, "learning_rate": 4.272260273972603e-07, "loss": 0.0026, "step": 426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 298.0, "completions/mean_terminated_length": 298.0, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.4199225604534149, "epoch": 2.9246575342465753, "frac_reward_zero_std": 0.0, "grad_norm": 1.75587278657067, "kl": 0.563929557800293, "learning_rate": 4.2705479452054794e-07, "loss": 0.0181, "num_tokens": 7097196.0, "reward": 0.5130889415740967, "reward_std": 0.06853906065225601, "rewards/check_gptzero_func/mean": 0.5130888819694519, "rewards/check_gptzero_func/std": 0.29080912470817566, "sampling/importance_sampling_ratio/max": 1.755521535873413, "sampling/importance_sampling_ratio/mean": 1.0002448558807373, "sampling/importance_sampling_ratio/min": 0.5019951462745667, "sampling/sampling_logp_difference/max": 0.6891648769378662, "sampling/sampling_logp_difference/mean": 0.018982138484716415, "step": 427 }, { "clip_ratio/high_max": 0.022393282502889633, "clip_ratio/high_mean": 0.012813853099942207, "clip_ratio/low_mean": 0.012152401730418205, "clip_ratio/low_min": 0.006758448202162981, "clip_ratio/region_mean": 0.024966254830360413, "entropy": 0.4166271984577179, "epoch": 2.9315068493150687, "grad_norm": 1.6871655752484005, "kl": 0.56923907995224, "learning_rate": 4.268835616438356e-07, "loss": 0.0097, "step": 428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 913.0, "completions/max_terminated_length": 913.0, "completions/mean_length": 267.1607360839844, "completions/mean_terminated_length": 267.1607360839844, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.39824795722961426, "epoch": 2.9383561643835616, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.69198470483515, "kl": 0.4874275326728821, "learning_rate": 4.2671232876712324e-07, "loss": 0.0213, "num_tokens": 7116771.0, "reward": 0.6018290519714355, "reward_std": 0.058207958936691284, "rewards/check_gptzero_func/mean": 0.6018290519714355, "rewards/check_gptzero_func/std": 0.3072289824485779, "sampling/importance_sampling_ratio/max": 1.6809138059616089, "sampling/importance_sampling_ratio/mean": 1.000004529953003, "sampling/importance_sampling_ratio/min": 0.5956223607063293, "sampling/sampling_logp_difference/max": 0.5193376541137695, "sampling/sampling_logp_difference/mean": 0.018689893186092377, "step": 429 }, { "clip_ratio/high_max": 0.019290603697299957, "clip_ratio/high_mean": 0.014325814321637154, "clip_ratio/low_mean": 0.011122049763798714, "clip_ratio/low_min": 0.007201645988970995, "clip_ratio/region_mean": 0.025447862222790718, "entropy": 0.3974475860595703, "epoch": 2.9452054794520546, "grad_norm": 1.5202527793357892, "kl": 0.5008175373077393, "learning_rate": 4.2654109589041094e-07, "loss": 0.0137, "step": 430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 353.33929443359375, "completions/mean_terminated_length": 353.33929443359375, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.47905537486076355, "epoch": 2.952054794520548, "frac_reward_zero_std": 0.0, "grad_norm": 1.5510344707075696, "kl": 0.44882169365882874, "learning_rate": 4.263698630136986e-07, "loss": -0.0026, "num_tokens": 7141168.0, "reward": 0.554107129573822, "reward_std": 0.04225100576877594, "rewards/check_gptzero_func/mean": 0.554107129573822, "rewards/check_gptzero_func/std": 0.2875281572341919, "sampling/importance_sampling_ratio/max": 1.598642110824585, "sampling/importance_sampling_ratio/mean": 1.0002986192703247, "sampling/importance_sampling_ratio/min": 0.3008643388748169, "sampling/sampling_logp_difference/max": 1.2010958194732666, "sampling/sampling_logp_difference/mean": 0.020269697532057762, "step": 431 }, { "clip_ratio/high_max": 0.018807338550686836, "clip_ratio/high_mean": 0.01314165536314249, "clip_ratio/low_mean": 0.011937621049582958, "clip_ratio/low_min": 0.00571805564686656, "clip_ratio/region_mean": 0.02507927641272545, "entropy": 0.480742871761322, "epoch": 2.958904109589041, "grad_norm": 1.5811942320587493, "kl": 0.44215163588523865, "learning_rate": 4.2619863013698624e-07, "loss": -0.0099, "step": 432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1054.0, "completions/max_terminated_length": 1054.0, "completions/mean_length": 267.39288330078125, "completions/mean_terminated_length": 267.39288330078125, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.3874422609806061, "epoch": 2.9657534246575343, "frac_reward_zero_std": 1.0, "grad_norm": 0.2047587325153792, "kl": 0.5773407816886902, "learning_rate": 4.26027397260274e-07, "loss": 0.0048, "num_tokens": 7160976.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_gptzero_func/mean": 0.0, "rewards/check_gptzero_func/std": 0.0, "sampling/importance_sampling_ratio/max": 1.7556110620498657, "sampling/importance_sampling_ratio/mean": 1.0001012086868286, "sampling/importance_sampling_ratio/min": 0.2598022520542145, "sampling/sampling_logp_difference/max": 1.347834587097168, "sampling/sampling_logp_difference/mean": 0.017744338139891624, "step": 433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "entropy": 0.3944026529788971, "epoch": 2.9726027397260273, "grad_norm": 0.11407193803445272, "kl": 0.5501869916915894, "learning_rate": 4.2585616438356165e-07, "loss": 0.0045, "step": 434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 989.0, "completions/max_terminated_length": 989.0, "completions/mean_length": 240.1785888671875, "completions/mean_terminated_length": 240.1785888671875, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.38908514380455017, "epoch": 2.9794520547945207, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.8666074940837651, "kl": 0.5691503286361694, "learning_rate": 4.256849315068493e-07, "loss": 0.0003, "num_tokens": 7178880.0, "reward": 0.5859895944595337, "reward_std": 0.04599367082118988, "rewards/check_gptzero_func/mean": 0.5859895348548889, "rewards/check_gptzero_func/std": 0.26934877038002014, "sampling/importance_sampling_ratio/max": 1.5919407606124878, "sampling/importance_sampling_ratio/mean": 0.9994714856147766, "sampling/importance_sampling_ratio/min": 0.38623401522636414, "sampling/sampling_logp_difference/max": 0.9513118267059326, "sampling/sampling_logp_difference/mean": 0.01838158816099167, "step": 435 }, { "clip_ratio/high_max": 0.019429264590144157, "clip_ratio/high_mean": 0.014523538760840893, "clip_ratio/low_mean": 0.014315664768218994, "clip_ratio/low_min": 0.007720588240772486, "clip_ratio/region_mean": 0.028839200735092163, "entropy": 0.3858567178249359, "epoch": 2.9863013698630136, "grad_norm": 2.459150934248252, "kl": 0.5675062537193298, "learning_rate": 4.2551369863013695e-07, "loss": -0.0076, "step": 436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1115.0, "completions/max_terminated_length": 1115.0, "completions/mean_length": 347.0535888671875, "completions/mean_terminated_length": 347.0535888671875, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.45681023597717285, "epoch": 2.993150684931507, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.5111914048062371, "kl": 0.4579731523990631, "learning_rate": 4.2534246575342465e-07, "loss": -0.0233, "num_tokens": 7203133.0, "reward": 0.5753355026245117, "reward_std": 0.039778079837560654, "rewards/check_gptzero_func/mean": 0.5753355026245117, "rewards/check_gptzero_func/std": 0.2716562747955322, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9996462464332581, "sampling/importance_sampling_ratio/min": 0.5272684693336487, "sampling/sampling_logp_difference/max": 1.2095816135406494, "sampling/sampling_logp_difference/mean": 0.019139215350151062, "step": 437 }, { "clip_ratio/high_max": 0.014617690816521645, "clip_ratio/high_mean": 0.010865802876651287, "clip_ratio/low_mean": 0.006687819957733154, "clip_ratio/low_min": 0.0049019609577953815, "clip_ratio/region_mean": 0.017553623765707016, "entropy": 0.4564935266971588, "epoch": 3.0, "grad_norm": 1.2769984830109335, "kl": 0.4537303149700165, "learning_rate": 4.251712328767123e-07, "loss": -0.0314, "step": 438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 326.71429443359375, "completions/mean_terminated_length": 326.71429443359375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.4878480136394501, "epoch": 3.006849315068493, "frac_reward_zero_std": 0.0, "grad_norm": 1.5805393398714536, "kl": 0.3963453769683838, "learning_rate": 4.2499999999999995e-07, "loss": 0.0091, "num_tokens": 7226421.0, "reward": 0.48175251483917236, "reward_std": 0.11739123612642288, "rewards/check_gptzero_func/mean": 0.48175248503685, "rewards/check_gptzero_func/std": 0.28507816791534424, "sampling/importance_sampling_ratio/max": 1.5820649862289429, "sampling/importance_sampling_ratio/mean": 1.0002192258834839, "sampling/importance_sampling_ratio/min": 0.6168044805526733, "sampling/sampling_logp_difference/max": 0.4832031726837158, "sampling/sampling_logp_difference/mean": 0.01920868270099163, "step": 439 }, { "clip_ratio/high_max": 0.013975726440548897, "clip_ratio/high_mean": 0.011202550493180752, "clip_ratio/low_mean": 0.008269512094557285, "clip_ratio/low_min": 0.00424628471955657, "clip_ratio/region_mean": 0.019472062587738037, "entropy": 0.4883231222629547, "epoch": 3.0136986301369864, "grad_norm": 1.314538659590765, "kl": 0.39845484495162964, "learning_rate": 4.2482876712328766e-07, "loss": 0.0009, "step": 440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1120.0, "completions/max_terminated_length": 1120.0, "completions/mean_length": 263.26788330078125, "completions/mean_terminated_length": 263.26788330078125, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.3901638090610504, "epoch": 3.0205479452054793, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.6532944607352171, "kl": 0.4993448853492737, "learning_rate": 4.246575342465753e-07, "loss": -0.0211, "num_tokens": 7246390.0, "reward": 0.6969364285469055, "reward_std": 0.03468836098909378, "rewards/check_gptzero_func/mean": 0.6969364285469055, "rewards/check_gptzero_func/std": 0.27666065096855164, "sampling/importance_sampling_ratio/max": 1.659895896911621, "sampling/importance_sampling_ratio/mean": 1.0003132820129395, "sampling/importance_sampling_ratio/min": 0.4955004155635834, "sampling/sampling_logp_difference/max": 0.7021870613098145, "sampling/sampling_logp_difference/mean": 0.01686578243970871, "step": 441 }, { "clip_ratio/high_max": 0.021469859406352043, "clip_ratio/high_mean": 0.012775843963027, "clip_ratio/low_mean": 0.009206017479300499, "clip_ratio/low_min": 0.004324324429035187, "clip_ratio/region_mean": 0.0219818614423275, "entropy": 0.39137002825737, "epoch": 3.0273972602739727, "grad_norm": 1.3740767621503938, "kl": 0.4926808178424835, "learning_rate": 4.24486301369863e-07, "loss": -0.0288, "step": 442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1492.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 386.6071472167969, "completions/mean_terminated_length": 386.6071472167969, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.5646977424621582, "epoch": 3.0342465753424657, "frac_reward_zero_std": 0.0, "grad_norm": 1.5181850156501489, "kl": 0.39461609721183777, "learning_rate": 4.2431506849315066e-07, "loss": -0.0012, "num_tokens": 7272620.0, "reward": 0.5634180903434753, "reward_std": 0.07899170368909836, "rewards/check_gptzero_func/mean": 0.5634180903434753, "rewards/check_gptzero_func/std": 0.27745458483695984, "sampling/importance_sampling_ratio/max": 1.6158748865127563, "sampling/importance_sampling_ratio/mean": 0.9999467730522156, "sampling/importance_sampling_ratio/min": 0.3736531138420105, "sampling/sampling_logp_difference/max": 0.9844274520874023, "sampling/sampling_logp_difference/mean": 0.022050628438591957, "step": 443 }, { "clip_ratio/high_max": 0.014878892339766026, "clip_ratio/high_mean": 0.01066535897552967, "clip_ratio/low_mean": 0.008398559875786304, "clip_ratio/low_min": 0.0039491006173193455, "clip_ratio/region_mean": 0.01906391978263855, "entropy": 0.5651803612709045, "epoch": 3.041095890410959, "grad_norm": 1.40919161955724, "kl": 0.390573650598526, "learning_rate": 4.2414383561643837e-07, "loss": -0.0091, "step": 444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 949.0, "completions/max_terminated_length": 949.0, "completions/mean_length": 253.3035888671875, "completions/mean_terminated_length": 253.3035888671875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.3940066397190094, "epoch": 3.047945205479452, "frac_reward_zero_std": 0.0, "grad_norm": 1.9067974291925713, "kl": 0.5375463366508484, "learning_rate": 4.23972602739726e-07, "loss": -0.0119, "num_tokens": 7291669.0, "reward": 0.5633066892623901, "reward_std": 0.07398279756307602, "rewards/check_gptzero_func/mean": 0.5633066296577454, "rewards/check_gptzero_func/std": 0.326462984085083, "sampling/importance_sampling_ratio/max": 1.544296383857727, "sampling/importance_sampling_ratio/mean": 1.0000829696655273, "sampling/importance_sampling_ratio/min": 0.6482194662094116, "sampling/sampling_logp_difference/max": 0.4345684051513672, "sampling/sampling_logp_difference/mean": 0.01733921840786934, "step": 445 }, { "clip_ratio/high_max": 0.018786126747727394, "clip_ratio/high_mean": 0.014339006505906582, "clip_ratio/low_mean": 0.010625234805047512, "clip_ratio/low_min": 0.006782945711165667, "clip_ratio/region_mean": 0.024964241310954094, "entropy": 0.39526480436325073, "epoch": 3.0547945205479454, "grad_norm": 1.6221990384525262, "kl": 0.5294922590255737, "learning_rate": 4.2380136986301367e-07, "loss": -0.0201, "step": 446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 954.0, "completions/max_terminated_length": 954.0, "completions/mean_length": 272.0357360839844, "completions/mean_terminated_length": 272.0357360839844, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.4054853916168213, "epoch": 3.0616438356164384, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.8039220213426677, "kl": 0.49979931116104126, "learning_rate": 4.2363013698630137e-07, "loss": 0.0088, "num_tokens": 7311605.0, "reward": 0.5465919971466064, "reward_std": 0.056033555418252945, "rewards/check_gptzero_func/mean": 0.5465919375419617, "rewards/check_gptzero_func/std": 0.3525618016719818, "sampling/importance_sampling_ratio/max": 1.5261188745498657, "sampling/importance_sampling_ratio/mean": 0.9996744990348816, "sampling/importance_sampling_ratio/min": 0.6475019454956055, "sampling/sampling_logp_difference/max": 0.4346334934234619, "sampling/sampling_logp_difference/mean": 0.018263081088662148, "step": 447 }, { "clip_ratio/high_max": 0.016430411487817764, "clip_ratio/high_mean": 0.011742055416107178, "clip_ratio/low_mean": 0.008590782061219215, "clip_ratio/low_min": 0.004672897048294544, "clip_ratio/region_mean": 0.020332839339971542, "entropy": 0.4065660834312439, "epoch": 3.0684931506849313, "grad_norm": 1.9518990260643558, "kl": 0.4378521740436554, "learning_rate": 4.23458904109589e-07, "loss": 0.0016, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2542.0, "completions/max_terminated_length": 2542.0, "completions/mean_length": 350.5714416503906, "completions/mean_terminated_length": 350.5714416503906, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.4785776734352112, "epoch": 3.0753424657534247, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.4662762298690013, "kl": 0.4593437612056732, "learning_rate": 4.2328767123287667e-07, "loss": 0.0229, "num_tokens": 7335417.0, "reward": 0.45218390226364136, "reward_std": 0.0698409304022789, "rewards/check_gptzero_func/mean": 0.45218390226364136, "rewards/check_gptzero_func/std": 0.2747618854045868, "sampling/importance_sampling_ratio/max": 1.6573824882507324, "sampling/importance_sampling_ratio/mean": 1.0001870393753052, "sampling/importance_sampling_ratio/min": 0.43100324273109436, "sampling/sampling_logp_difference/max": 0.8416397571563721, "sampling/sampling_logp_difference/mean": 0.019495662301778793, "step": 449 }, { "clip_ratio/high_max": 0.016553480178117752, "clip_ratio/high_mean": 0.011577353812754154, "clip_ratio/low_mean": 0.008467757143080235, "clip_ratio/low_min": 0.005337399896234274, "clip_ratio/region_mean": 0.02004511095583439, "entropy": 0.4774186909198761, "epoch": 3.0821917808219177, "grad_norm": 1.3370386316063876, "kl": 0.4676463305950165, "learning_rate": 4.231164383561643e-07, "loss": 0.0154, "step": 450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1680.0, "completions/max_terminated_length": 1680.0, "completions/mean_length": 359.0535888671875, "completions/mean_terminated_length": 359.0535888671875, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "entropy": 0.4939560294151306, "epoch": 3.089041095890411, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.5086415454422724, "kl": 0.4886672794818878, "learning_rate": 4.229452054794521e-07, "loss": 0.033, "num_tokens": 7360680.0, "reward": 0.5117508769035339, "reward_std": 0.05007917806506157, "rewards/check_gptzero_func/mean": 0.5117508172988892, "rewards/check_gptzero_func/std": 0.24039827287197113, "sampling/importance_sampling_ratio/max": 1.6190663576126099, "sampling/importance_sampling_ratio/mean": 0.999580979347229, "sampling/importance_sampling_ratio/min": 0.5955086946487427, "sampling/sampling_logp_difference/max": 0.5183392763137817, "sampling/sampling_logp_difference/mean": 0.019588569179177284, "step": 451 }, { "clip_ratio/high_max": 0.018916595727205276, "clip_ratio/high_mean": 0.012203727848827839, "clip_ratio/low_mean": 0.007785576395690441, "clip_ratio/low_min": 0.0050580184906721115, "clip_ratio/region_mean": 0.01998930238187313, "entropy": 0.4965417683124542, "epoch": 3.095890410958904, "grad_norm": 1.4286325597417222, "kl": 0.46534571051597595, "learning_rate": 4.2277397260273973e-07, "loss": 0.0258, "step": 452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1069.0, "completions/max_terminated_length": 1069.0, "completions/mean_length": 361.9464416503906, "completions/mean_terminated_length": 361.9464416503906, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.4532834589481354, "epoch": 3.1027397260273974, "frac_reward_zero_std": 0.0, "grad_norm": 1.5663166792956864, "kl": 0.39788147807121277, "learning_rate": 4.226027397260274e-07, "loss": -0.005, "num_tokens": 7385735.0, "reward": 0.5948807597160339, "reward_std": 0.04255663976073265, "rewards/check_gptzero_func/mean": 0.5948807001113892, "rewards/check_gptzero_func/std": 0.19474056363105774, "sampling/importance_sampling_ratio/max": 1.5567331314086914, "sampling/importance_sampling_ratio/mean": 1.0000545978546143, "sampling/importance_sampling_ratio/min": 0.45371681451797485, "sampling/sampling_logp_difference/max": 0.7902820110321045, "sampling/sampling_logp_difference/mean": 0.019448943436145782, "step": 453 }, { "clip_ratio/high_max": 0.013420089147984982, "clip_ratio/high_mean": 0.010429293848574162, "clip_ratio/low_mean": 0.0070374649949371815, "clip_ratio/low_min": 0.005494505632668734, "clip_ratio/region_mean": 0.01746675744652748, "entropy": 0.4532834589481354, "epoch": 3.1095890410958904, "grad_norm": 1.2355847945968135, "kl": 0.3899776041507721, "learning_rate": 4.224315068493151e-07, "loss": -0.0124, "step": 454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 814.0, "completions/max_terminated_length": 814.0, "completions/mean_length": 251.94644165039062, "completions/mean_terminated_length": 251.94644165039062, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.43216511607170105, "epoch": 3.1164383561643834, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.996355549716844, "kl": 0.5442614555358887, "learning_rate": 4.2226027397260273e-07, "loss": 0.0207, "num_tokens": 7404696.0, "reward": 0.5012215375900269, "reward_std": 0.09337075054645538, "rewards/check_gptzero_func/mean": 0.5012214779853821, "rewards/check_gptzero_func/std": 0.2971692681312561, "sampling/importance_sampling_ratio/max": 1.8625214099884033, "sampling/importance_sampling_ratio/mean": 1.0005046129226685, "sampling/importance_sampling_ratio/min": 0.5260701775550842, "sampling/sampling_logp_difference/max": 0.6423206329345703, "sampling/sampling_logp_difference/mean": 0.019195016473531723, "step": 455 }, { "clip_ratio/high_max": 0.019772319123148918, "clip_ratio/high_mean": 0.015133482404053211, "clip_ratio/low_mean": 0.011051931418478489, "clip_ratio/low_min": 0.009109730832278728, "clip_ratio/region_mean": 0.02618541195988655, "entropy": 0.4374207854270935, "epoch": 3.1232876712328768, "grad_norm": 1.77687151860226, "kl": 0.5268467664718628, "learning_rate": 4.220890410958904e-07, "loss": 0.012, "step": 456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 944.0, "completions/max_terminated_length": 944.0, "completions/mean_length": 283.1964416503906, "completions/mean_terminated_length": 283.1964416503906, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.47156578302383423, "epoch": 3.1301369863013697, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.7391205988329912, "kl": 0.5193468332290649, "learning_rate": 4.2191780821917803e-07, "loss": -0.0119, "num_tokens": 7425259.0, "reward": 0.5212597250938416, "reward_std": 0.05188823863863945, "rewards/check_gptzero_func/mean": 0.5212597250938416, "rewards/check_gptzero_func/std": 0.2665650546550751, "sampling/importance_sampling_ratio/max": 1.7586225271224976, "sampling/importance_sampling_ratio/mean": 0.9998173117637634, "sampling/importance_sampling_ratio/min": 0.56510329246521, "sampling/sampling_logp_difference/max": 0.570746660232544, "sampling/sampling_logp_difference/mean": 0.02045266143977642, "step": 457 }, { "clip_ratio/high_max": 0.013797634281218052, "clip_ratio/high_mean": 0.010983937419950962, "clip_ratio/low_mean": 0.010412714444100857, "clip_ratio/low_min": 0.005988024175167084, "clip_ratio/region_mean": 0.02139665186405182, "entropy": 0.4722537100315094, "epoch": 3.136986301369863, "grad_norm": 1.4915821091320112, "kl": 0.5235239267349243, "learning_rate": 4.2174657534246574e-07, "loss": -0.0207, "step": 458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1017.0, "completions/max_terminated_length": 1017.0, "completions/mean_length": 287.7321472167969, "completions/mean_terminated_length": 287.7321472167969, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.41183874011039734, "epoch": 3.143835616438356, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.759039053697737, "kl": 0.6080593466758728, "learning_rate": 4.215753424657534e-07, "loss": 0.007, "num_tokens": 7446310.0, "reward": 0.5244979858398438, "reward_std": 0.05625303462147713, "rewards/check_gptzero_func/mean": 0.5244979858398438, "rewards/check_gptzero_func/std": 0.2705256938934326, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0003173351287842, "sampling/importance_sampling_ratio/min": 0.5772016644477844, "sampling/sampling_logp_difference/max": 0.9262034893035889, "sampling/sampling_logp_difference/mean": 0.01801391690969467, "step": 459 }, { "clip_ratio/high_max": 0.019976498559117317, "clip_ratio/high_mean": 0.015140034258365631, "clip_ratio/low_mean": 0.010713356547057629, "clip_ratio/low_min": 0.007019596174359322, "clip_ratio/region_mean": 0.025853391736745834, "entropy": 0.4156864583492279, "epoch": 3.1506849315068495, "grad_norm": 72.35867219934687, "kl": 1.526977777481079, "learning_rate": 4.214041095890411e-07, "loss": 0.0072, "step": 460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 353.76788330078125, "completions/mean_terminated_length": 353.76788330078125, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "entropy": 0.43768659234046936, "epoch": 3.1575342465753424, "frac_reward_zero_std": 0.0, "grad_norm": 1.6213570210575439, "kl": 0.4849376082420349, "learning_rate": 4.212328767123288e-07, "loss": 0.0323, "num_tokens": 7471009.0, "reward": 0.5168156623840332, "reward_std": 0.08419404923915863, "rewards/check_gptzero_func/mean": 0.5168156623840332, "rewards/check_gptzero_func/std": 0.19784660637378693, "sampling/importance_sampling_ratio/max": 1.8408313989639282, "sampling/importance_sampling_ratio/mean": 1.0000646114349365, "sampling/importance_sampling_ratio/min": 0.48238492012023926, "sampling/sampling_logp_difference/max": 0.7290129661560059, "sampling/sampling_logp_difference/mean": 0.01983928307890892, "step": 461 }, { "clip_ratio/high_max": 0.014693534933030605, "clip_ratio/high_mean": 0.011160275898873806, "clip_ratio/low_mean": 0.00851698312908411, "clip_ratio/low_min": 0.004372018855065107, "clip_ratio/region_mean": 0.019677260890603065, "entropy": 0.43783077597618103, "epoch": 3.1643835616438354, "grad_norm": 1.281128346019311, "kl": 0.4838812053203583, "learning_rate": 4.2106164383561644e-07, "loss": 0.0244, "step": 462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 748.0, "completions/max_terminated_length": 748.0, "completions/mean_length": 238.83929443359375, "completions/mean_terminated_length": 238.83929443359375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.4170268476009369, "epoch": 3.171232876712329, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.868106596164837, "kl": 0.5192426443099976, "learning_rate": 4.208904109589041e-07, "loss": 0.0077, "num_tokens": 7489302.0, "reward": 0.45651447772979736, "reward_std": 0.05835353583097458, "rewards/check_gptzero_func/mean": 0.456514447927475, "rewards/check_gptzero_func/std": 0.3118880093097687, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9994297623634338, "sampling/importance_sampling_ratio/min": 0.48078179359436035, "sampling/sampling_logp_difference/max": 0.7323417663574219, "sampling/sampling_logp_difference/mean": 0.01869340054690838, "step": 463 }, { "clip_ratio/high_max": 0.022169437259435654, "clip_ratio/high_mean": 0.015432717278599739, "clip_ratio/low_mean": 0.011973983608186245, "clip_ratio/low_min": 0.006269592326134443, "clip_ratio/region_mean": 0.02740670181810856, "entropy": 0.41819116473197937, "epoch": 3.1780821917808217, "grad_norm": 1.7600629469127296, "kl": 0.5218718647956848, "learning_rate": 4.2071917808219174e-07, "loss": -0.0014, "step": 464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 847.0, "completions/max_terminated_length": 847.0, "completions/mean_length": 258.51788330078125, "completions/mean_terminated_length": 258.51788330078125, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.4730234444141388, "epoch": 3.184931506849315, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.86533784461948, "kl": 0.5758450627326965, "learning_rate": 4.2054794520547945e-07, "loss": 0.0021, "num_tokens": 7508347.0, "reward": 0.4572948217391968, "reward_std": 0.09298644214868546, "rewards/check_gptzero_func/mean": 0.45729485154151917, "rewards/check_gptzero_func/std": 0.3145589530467987, "sampling/importance_sampling_ratio/max": 1.8937104940414429, "sampling/importance_sampling_ratio/mean": 0.9994855523109436, "sampling/importance_sampling_ratio/min": 0.6057316660881042, "sampling/sampling_logp_difference/max": 0.638538122177124, "sampling/sampling_logp_difference/mean": 0.018840385600924492, "step": 465 }, { "clip_ratio/high_max": 0.0223214291036129, "clip_ratio/high_mean": 0.014441410079598427, "clip_ratio/low_mean": 0.011135556735098362, "clip_ratio/low_min": 0.007267442066222429, "clip_ratio/region_mean": 0.025576965883374214, "entropy": 0.47189468145370483, "epoch": 3.191780821917808, "grad_norm": 1.613103419057761, "kl": 0.5734944343566895, "learning_rate": 4.203767123287671e-07, "loss": -0.0063, "step": 466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1063.0, "completions/max_terminated_length": 1063.0, "completions/mean_length": 346.0357360839844, "completions/mean_terminated_length": 346.0357360839844, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.49945884943008423, "epoch": 3.1986301369863015, "frac_reward_zero_std": 0.0, "grad_norm": 1.7291136623178989, "kl": 0.47700071334838867, "learning_rate": 4.2020547945205475e-07, "loss": 0.0109, "num_tokens": 7532687.0, "reward": 0.5833393931388855, "reward_std": 0.07719603925943375, "rewards/check_gptzero_func/mean": 0.5833393335342407, "rewards/check_gptzero_func/std": 0.2505154609680176, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.999921977519989, "sampling/importance_sampling_ratio/min": 0.4770994782447815, "sampling/sampling_logp_difference/max": 1.229647159576416, "sampling/sampling_logp_difference/mean": 0.020177116617560387, "step": 467 }, { "clip_ratio/high_max": 0.016927633434534073, "clip_ratio/high_mean": 0.011144527234137058, "clip_ratio/low_mean": 0.009892722591757774, "clip_ratio/low_min": 0.005365526303648949, "clip_ratio/region_mean": 0.021037248894572258, "entropy": 0.5002941489219666, "epoch": 3.2054794520547945, "grad_norm": 1.697793387937522, "kl": 0.45032405853271484, "learning_rate": 4.200342465753424e-07, "loss": 0.004, "step": 468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 962.0, "completions/max_terminated_length": 962.0, "completions/mean_length": 324.4107360839844, "completions/mean_terminated_length": 324.4107360839844, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "entropy": 0.4852828085422516, "epoch": 3.212328767123288, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.646251000349545, "kl": 0.4846603572368622, "learning_rate": 4.1986301369863015e-07, "loss": 0.0269, "num_tokens": 7555824.0, "reward": 0.499722421169281, "reward_std": 0.08420738577842712, "rewards/check_gptzero_func/mean": 0.499722421169281, "rewards/check_gptzero_func/std": 0.21642525494098663, "sampling/importance_sampling_ratio/max": 1.6217584609985352, "sampling/importance_sampling_ratio/mean": 0.9999415278434753, "sampling/importance_sampling_ratio/min": 0.35038822889328003, "sampling/sampling_logp_difference/max": 1.0487134456634521, "sampling/sampling_logp_difference/mean": 0.020076412707567215, "step": 469 }, { "clip_ratio/high_max": 0.015663644298911095, "clip_ratio/high_mean": 0.012148608453571796, "clip_ratio/low_mean": 0.010546826757490635, "clip_ratio/low_min": 0.0066283694468438625, "clip_ratio/region_mean": 0.02269543521106243, "entropy": 0.4850010871887207, "epoch": 3.219178082191781, "grad_norm": 1.379430851814966, "kl": 0.48671191930770874, "learning_rate": 4.196917808219178e-07, "loss": 0.0183, "step": 470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 273.25, "completions/mean_terminated_length": 273.25, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.3929573595523834, "epoch": 3.2260273972602738, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.605127873465274, "kl": 0.4248778522014618, "learning_rate": 4.1952054794520546e-07, "loss": 0.0219, "num_tokens": 7576578.0, "reward": 0.6131423711776733, "reward_std": 0.07394718378782272, "rewards/check_gptzero_func/mean": 0.6131423711776733, "rewards/check_gptzero_func/std": 0.32346969842910767, "sampling/importance_sampling_ratio/max": 1.8784486055374146, "sampling/importance_sampling_ratio/mean": 1.0000710487365723, "sampling/importance_sampling_ratio/min": 0.5087717771530151, "sampling/sampling_logp_difference/max": 0.6757557392120361, "sampling/sampling_logp_difference/mean": 0.017050568014383316, "step": 471 }, { "clip_ratio/high_max": 0.018808776512742043, "clip_ratio/high_mean": 0.010998108424246311, "clip_ratio/low_mean": 0.009991303086280823, "clip_ratio/low_min": 0.006498194765299559, "clip_ratio/region_mean": 0.02098941244184971, "entropy": 0.39172109961509705, "epoch": 3.232876712328767, "grad_norm": 1.3240013866445315, "kl": 0.4316242039203644, "learning_rate": 4.1934931506849316e-07, "loss": 0.0139, "step": 472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1120.0, "completions/max_terminated_length": 1120.0, "completions/mean_length": 302.4821472167969, "completions/mean_terminated_length": 302.4821472167969, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.43242937326431274, "epoch": 3.23972602739726, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.8166417697822614, "kl": 0.5650509595870972, "learning_rate": 4.191780821917808e-07, "loss": 0.0222, "num_tokens": 7598289.0, "reward": 0.5533778667449951, "reward_std": 0.09285029023885727, "rewards/check_gptzero_func/mean": 0.5533778071403503, "rewards/check_gptzero_func/std": 0.30259469151496887, "sampling/importance_sampling_ratio/max": 1.7847235202789307, "sampling/importance_sampling_ratio/mean": 1.001013994216919, "sampling/importance_sampling_ratio/min": 0.5597032904624939, "sampling/sampling_logp_difference/max": 0.5803484916687012, "sampling/sampling_logp_difference/mean": 0.02006557397544384, "step": 473 }, { "clip_ratio/high_max": 0.016289593651890755, "clip_ratio/high_mean": 0.012193441390991211, "clip_ratio/low_mean": 0.01004562247544527, "clip_ratio/low_min": 0.0062949638813734055, "clip_ratio/region_mean": 0.022239062935113907, "entropy": 0.4327719807624817, "epoch": 3.2465753424657535, "grad_norm": 1.5651297035646001, "kl": 0.5111077427864075, "learning_rate": 4.1900684931506846e-07, "loss": 0.0141, "step": 474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1102.0, "completions/max_terminated_length": 1102.0, "completions/mean_length": 346.5535888671875, "completions/mean_terminated_length": 346.5535888671875, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "entropy": 0.4763779938220978, "epoch": 3.2534246575342465, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.601598755665416, "kl": 0.49680259823799133, "learning_rate": 4.188356164383561e-07, "loss": -0.0217, "num_tokens": 7622610.0, "reward": 0.4091305136680603, "reward_std": 0.0892767384648323, "rewards/check_gptzero_func/mean": 0.4091304838657379, "rewards/check_gptzero_func/std": 0.2698145806789398, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0001094341278076, "sampling/importance_sampling_ratio/min": 0.5483726263046265, "sampling/sampling_logp_difference/max": 0.9863030910491943, "sampling/sampling_logp_difference/mean": 0.020067377015948296, "step": 475 }, { "clip_ratio/high_max": 0.023433979600667953, "clip_ratio/high_mean": 0.01608988270163536, "clip_ratio/low_mean": 0.012888031080365181, "clip_ratio/low_min": 0.005652272142469883, "clip_ratio/region_mean": 0.02897791378200054, "entropy": 0.48002806305885315, "epoch": 3.26027397260274, "grad_norm": 5.105796302959958, "kl": 0.5888614654541016, "learning_rate": 4.186643835616438e-07, "loss": -0.0275, "step": 476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1087.0, "completions/max_terminated_length": 1087.0, "completions/mean_length": 322.5, "completions/mean_terminated_length": 322.5, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.4696597158908844, "epoch": 3.267123287671233, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.5530279603633252, "kl": 0.529487669467926, "learning_rate": 4.1849315068493146e-07, "loss": 0.0084, "num_tokens": 7645784.0, "reward": 0.5473055839538574, "reward_std": 0.09197264164686203, "rewards/check_gptzero_func/mean": 0.5473055839538574, "rewards/check_gptzero_func/std": 0.29274776577949524, "sampling/importance_sampling_ratio/max": 1.9833735227584839, "sampling/importance_sampling_ratio/mean": 0.9997731447219849, "sampling/importance_sampling_ratio/min": 0.42985978722572327, "sampling/sampling_logp_difference/max": 0.8442962169647217, "sampling/sampling_logp_difference/mean": 0.019458813592791557, "step": 477 }, { "clip_ratio/high_max": 0.019709544256329536, "clip_ratio/high_mean": 0.014229093678295612, "clip_ratio/low_mean": 0.008577673695981503, "clip_ratio/low_min": 0.00560828298330307, "clip_ratio/region_mean": 0.022806767374277115, "entropy": 0.4713764190673828, "epoch": 3.2739726027397262, "grad_norm": 1.3130267328949188, "kl": 0.524124264717102, "learning_rate": 4.1832191780821917e-07, "loss": -0.0005, "step": 478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 885.0, "completions/max_terminated_length": 885.0, "completions/mean_length": 308.25, "completions/mean_terminated_length": 308.25, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.49177488684654236, "epoch": 3.280821917808219, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.695526997985897, "kl": 0.4269264042377472, "learning_rate": 4.1815068493150687e-07, "loss": 0.007, "num_tokens": 7667258.0, "reward": 0.5162050724029541, "reward_std": 0.08825661987066269, "rewards/check_gptzero_func/mean": 0.5162050724029541, "rewards/check_gptzero_func/std": 0.26859399676322937, "sampling/importance_sampling_ratio/max": 1.6146615743637085, "sampling/importance_sampling_ratio/mean": 0.9998269081115723, "sampling/importance_sampling_ratio/min": 0.6119703650474548, "sampling/sampling_logp_difference/max": 0.4910714626312256, "sampling/sampling_logp_difference/mean": 0.01970621570944786, "step": 479 }, { "clip_ratio/high_max": 0.020714284852147102, "clip_ratio/high_mean": 0.01495873462408781, "clip_ratio/low_mean": 0.009859488345682621, "clip_ratio/low_min": 0.005323868710547686, "clip_ratio/region_mean": 0.02481822483241558, "entropy": 0.4952567517757416, "epoch": 3.287671232876712, "grad_norm": 1.704586992913013, "kl": 0.4181745946407318, "learning_rate": 4.179794520547945e-07, "loss": -0.0009, "step": 480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1663.0, "completions/max_terminated_length": 1663.0, "completions/mean_length": 326.01788330078125, "completions/mean_terminated_length": 326.01788330078125, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.3981316387653351, "epoch": 3.2945205479452055, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.594986363269119, "kl": 0.5316309332847595, "learning_rate": 4.1780821917808217e-07, "loss": -0.0065, "num_tokens": 7690623.0, "reward": 0.6675976514816284, "reward_std": 0.0733807384967804, "rewards/check_gptzero_func/mean": 0.6675976514816284, "rewards/check_gptzero_func/std": 0.2729575037956238, "sampling/importance_sampling_ratio/max": 1.561289668083191, "sampling/importance_sampling_ratio/mean": 1.0007331371307373, "sampling/importance_sampling_ratio/min": 0.5106136798858643, "sampling/sampling_logp_difference/max": 0.6721420288085938, "sampling/sampling_logp_difference/mean": 0.017880862578749657, "step": 481 }, { "clip_ratio/high_max": 0.016296295449137688, "clip_ratio/high_mean": 0.01369976531714201, "clip_ratio/low_mean": 0.00983111560344696, "clip_ratio/low_min": 0.006306007504463196, "clip_ratio/region_mean": 0.023530879989266396, "entropy": 0.3992086350917816, "epoch": 3.3013698630136985, "grad_norm": 1.266532007064286, "kl": 0.5200954675674438, "learning_rate": 4.176369863013698e-07, "loss": -0.0148, "step": 482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 972.0, "completions/max_terminated_length": 972.0, "completions/mean_length": 287.2321472167969, "completions/mean_terminated_length": 287.2321472167969, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.41490936279296875, "epoch": 3.308219178082192, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.574893950030457, "kl": 0.4856852889060974, "learning_rate": 4.174657534246575e-07, "loss": 0.0162, "num_tokens": 7711536.0, "reward": 0.4675489664077759, "reward_std": 0.08999417722225189, "rewards/check_gptzero_func/mean": 0.4675489366054535, "rewards/check_gptzero_func/std": 0.28230687975883484, "sampling/importance_sampling_ratio/max": 1.8604437112808228, "sampling/importance_sampling_ratio/mean": 0.9995095133781433, "sampling/importance_sampling_ratio/min": 0.5486602187156677, "sampling/sampling_logp_difference/max": 0.6208150386810303, "sampling/sampling_logp_difference/mean": 0.0185556560754776, "step": 483 }, { "clip_ratio/high_max": 0.01572481542825699, "clip_ratio/high_mean": 0.011793887242674828, "clip_ratio/low_mean": 0.010256694629788399, "clip_ratio/low_min": 0.007253384683281183, "clip_ratio/region_mean": 0.022050580009818077, "entropy": 0.41335412859916687, "epoch": 3.315068493150685, "grad_norm": 1.335672555985743, "kl": 0.48998838663101196, "learning_rate": 4.172945205479452e-07, "loss": 0.0078, "step": 484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 980.0, "completions/max_terminated_length": 980.0, "completions/mean_length": 181.33929443359375, "completions/mean_terminated_length": 181.33929443359375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.2743310034275055, "epoch": 3.3219178082191783, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.114041194248886, "kl": 0.6251950860023499, "learning_rate": 4.171232876712328e-07, "loss": 0.0288, "num_tokens": 7726537.0, "reward": 0.6185058355331421, "reward_std": 0.09191993623971939, "rewards/check_gptzero_func/mean": 0.6185058355331421, "rewards/check_gptzero_func/std": 0.3660455048084259, "sampling/importance_sampling_ratio/max": 1.7985888719558716, "sampling/importance_sampling_ratio/mean": 1.000146746635437, "sampling/importance_sampling_ratio/min": 0.5145557522773743, "sampling/sampling_logp_difference/max": 0.6644513607025146, "sampling/sampling_logp_difference/mean": 0.014857699163258076, "step": 485 }, { "clip_ratio/high_max": 0.01949152536690235, "clip_ratio/high_mean": 0.013406793586909771, "clip_ratio/low_mean": 0.013813285157084465, "clip_ratio/low_min": 0.010169491171836853, "clip_ratio/region_mean": 0.02722007967531681, "entropy": 0.271262526512146, "epoch": 3.328767123287671, "grad_norm": 1.8853922913103707, "kl": 0.6448383927345276, "learning_rate": 4.1695205479452053e-07, "loss": 0.0208, "step": 486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1026.0, "completions/max_terminated_length": 1026.0, "completions/mean_length": 249.4107208251953, "completions/mean_terminated_length": 249.4107208251953, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.3734149932861328, "epoch": 3.3356164383561646, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.7755449338394778, "kl": 0.6755719184875488, "learning_rate": 4.1678082191780823e-07, "loss": -0.0248, "num_tokens": 7745610.0, "reward": 0.5827426314353943, "reward_std": 0.0786251425743103, "rewards/check_gptzero_func/mean": 0.5827426314353943, "rewards/check_gptzero_func/std": 0.2627158463001251, "sampling/importance_sampling_ratio/max": 1.6976984739303589, "sampling/importance_sampling_ratio/mean": 1.0001782178878784, "sampling/importance_sampling_ratio/min": 0.5057286024093628, "sampling/sampling_logp_difference/max": 0.6817550659179688, "sampling/sampling_logp_difference/mean": 0.01769782416522503, "step": 487 }, { "clip_ratio/high_max": 0.019846349954605103, "clip_ratio/high_mean": 0.015140801668167114, "clip_ratio/low_mean": 0.012552534230053425, "clip_ratio/low_min": 0.011523687280714512, "clip_ratio/region_mean": 0.027693334966897964, "entropy": 0.37290021777153015, "epoch": 3.3424657534246576, "grad_norm": 1.5396159122557147, "kl": 0.6488858461380005, "learning_rate": 4.166095890410959e-07, "loss": -0.033, "step": 488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 314.75, "completions/mean_terminated_length": 314.75, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "entropy": 0.4115291237831116, "epoch": 3.3493150684931505, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.7003389155132547, "kl": 0.4769664406776428, "learning_rate": 4.1643835616438353e-07, "loss": 0.007, "num_tokens": 7768590.0, "reward": 0.5851594805717468, "reward_std": 0.10722137242555618, "rewards/check_gptzero_func/mean": 0.585159420967102, "rewards/check_gptzero_func/std": 0.29561910033226013, "sampling/importance_sampling_ratio/max": 1.6399849653244019, "sampling/importance_sampling_ratio/mean": 1.0004245042800903, "sampling/importance_sampling_ratio/min": 0.5315423607826233, "sampling/sampling_logp_difference/max": 0.6319724321365356, "sampling/sampling_logp_difference/mean": 0.017643559724092484, "step": 489 }, { "clip_ratio/high_max": 0.014347590506076813, "clip_ratio/high_mean": 0.011666715145111084, "clip_ratio/low_mean": 0.01001689676195383, "clip_ratio/low_min": 0.006899950560182333, "clip_ratio/region_mean": 0.02168361283838749, "entropy": 0.41460636258125305, "epoch": 3.356164383561644, "grad_norm": 1.534925099185854, "kl": 0.4402444660663605, "learning_rate": 4.1626712328767124e-07, "loss": -0.001, "step": 490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 955.0, "completions/max_terminated_length": 955.0, "completions/mean_length": 187.87501525878906, "completions/mean_terminated_length": 187.87501525878906, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.3476373851299286, "epoch": 3.363013698630137, "frac_reward_zero_std": 0.0, "grad_norm": 2.863959047412713, "kl": 0.7344058156013489, "learning_rate": 4.160958904109589e-07, "loss": 0.0475, "num_tokens": 7784035.0, "reward": 0.5542150735855103, "reward_std": 0.08411113172769547, "rewards/check_gptzero_func/mean": 0.5542150139808655, "rewards/check_gptzero_func/std": 0.2961411476135254, "sampling/importance_sampling_ratio/max": 1.9638853073120117, "sampling/importance_sampling_ratio/mean": 0.9997879862785339, "sampling/importance_sampling_ratio/min": 0.5730352997779846, "sampling/sampling_logp_difference/max": 0.6749248504638672, "sampling/sampling_logp_difference/mean": 0.01808553747832775, "step": 491 }, { "clip_ratio/high_max": 0.016688061878085136, "clip_ratio/high_mean": 0.0145888552069664, "clip_ratio/low_mean": 0.01673806831240654, "clip_ratio/low_min": 0.010904425755143166, "clip_ratio/region_mean": 0.03132692351937294, "entropy": 0.34653574228286743, "epoch": 3.3698630136986303, "grad_norm": 7.98450533180743, "kl": 0.7228254079818726, "learning_rate": 4.1592465753424654e-07, "loss": 0.0511, "step": 492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1193.0, "completions/max_terminated_length": 1193.0, "completions/mean_length": 261.8035888671875, "completions/mean_terminated_length": 261.8035888671875, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.4067089259624481, "epoch": 3.3767123287671232, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.8361863164308048, "kl": 0.6780094504356384, "learning_rate": 4.157534246575342e-07, "loss": -0.0159, "num_tokens": 7804010.0, "reward": 0.5251018404960632, "reward_std": 0.0804722011089325, "rewards/check_gptzero_func/mean": 0.5251017808914185, "rewards/check_gptzero_func/std": 0.27357470989227295, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9999498128890991, "sampling/importance_sampling_ratio/min": 0.5193390250205994, "sampling/sampling_logp_difference/max": 0.7382302284240723, "sampling/sampling_logp_difference/mean": 0.018940484151244164, "step": 493 }, { "clip_ratio/high_max": 0.015996122732758522, "clip_ratio/high_mean": 0.012200291268527508, "clip_ratio/low_mean": 0.008361431770026684, "clip_ratio/low_min": 0.006016847211867571, "clip_ratio/region_mean": 0.02056172303855419, "entropy": 0.4098653197288513, "epoch": 3.383561643835616, "grad_norm": 1.7944326189410187, "kl": 0.5774158239364624, "learning_rate": 4.155821917808219e-07, "loss": -0.0228, "step": 494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1101.0, "completions/max_terminated_length": 1101.0, "completions/mean_length": 292.5714416503906, "completions/mean_terminated_length": 292.5714416503906, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.4717128872871399, "epoch": 3.3904109589041096, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.6998074952286573, "kl": 0.49909380078315735, "learning_rate": 4.1541095890410954e-07, "loss": -0.0044, "num_tokens": 7824826.0, "reward": 0.5954373478889465, "reward_std": 0.09060129523277283, "rewards/check_gptzero_func/mean": 0.5954373478889465, "rewards/check_gptzero_func/std": 0.27933767437934875, "sampling/importance_sampling_ratio/max": 1.627264380455017, "sampling/importance_sampling_ratio/mean": 0.9994211196899414, "sampling/importance_sampling_ratio/min": 0.5680952668190002, "sampling/sampling_logp_difference/max": 0.5654661655426025, "sampling/sampling_logp_difference/mean": 0.019440079107880592, "step": 495 }, { "clip_ratio/high_max": 0.02238805964589119, "clip_ratio/high_mean": 0.014316508546471596, "clip_ratio/low_mean": 0.009919802658259869, "clip_ratio/low_min": 0.006241640541702509, "clip_ratio/region_mean": 0.02423631027340889, "entropy": 0.4736582934856415, "epoch": 3.3972602739726026, "grad_norm": 1.6665450215674709, "kl": 0.4914906322956085, "learning_rate": 4.1523972602739724e-07, "loss": -0.0133, "step": 496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1133.0, "completions/max_terminated_length": 1133.0, "completions/mean_length": 337.4285888671875, "completions/mean_terminated_length": 337.4285888671875, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.4751755893230438, "epoch": 3.404109589041096, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.6627389854914734, "kl": 0.5064553618431091, "learning_rate": 4.1506849315068495e-07, "loss": -0.0002, "num_tokens": 7848578.0, "reward": 0.5365485548973083, "reward_std": 0.07966728508472443, "rewards/check_gptzero_func/mean": 0.5365485548973083, "rewards/check_gptzero_func/std": 0.2571119964122772, "sampling/importance_sampling_ratio/max": 1.7884069681167603, "sampling/importance_sampling_ratio/mean": 0.9997381567955017, "sampling/importance_sampling_ratio/min": 0.384842187166214, "sampling/sampling_logp_difference/max": 0.9549219608306885, "sampling/sampling_logp_difference/mean": 0.020356029272079468, "step": 497 }, { "clip_ratio/high_max": 0.014658726751804352, "clip_ratio/high_mean": 0.01216293964534998, "clip_ratio/low_mean": 0.010491901077330112, "clip_ratio/low_min": 0.005311077460646629, "clip_ratio/region_mean": 0.022654840722680092, "entropy": 0.47322770953178406, "epoch": 3.410958904109589, "grad_norm": 5.691183532660796, "kl": 0.6041978597640991, "learning_rate": 4.148972602739726e-07, "loss": -0.0079, "step": 498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/max_terminated_length": 470.0, "completions/mean_length": 190.5357208251953, "completions/mean_terminated_length": 190.5357208251953, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.3298071026802063, "epoch": 3.4178082191780823, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.0535714118123947, "kl": 0.673547089099884, "learning_rate": 4.1472602739726025e-07, "loss": -0.0076, "num_tokens": 7864464.0, "reward": 0.5816425085067749, "reward_std": 0.03745466470718384, "rewards/check_gptzero_func/mean": 0.5816425681114197, "rewards/check_gptzero_func/std": 0.3060147166252136, "sampling/importance_sampling_ratio/max": 1.61841881275177, "sampling/importance_sampling_ratio/mean": 1.0002716779708862, "sampling/importance_sampling_ratio/min": 0.5365948677062988, "sampling/sampling_logp_difference/max": 0.6225118637084961, "sampling/sampling_logp_difference/mean": 0.015817660838365555, "step": 499 }, { "clip_ratio/high_max": 0.0279720276594162, "clip_ratio/high_mean": 0.017558906227350235, "clip_ratio/low_mean": 0.014504906721413136, "clip_ratio/low_min": 0.008748318068683147, "clip_ratio/region_mean": 0.032063812017440796, "entropy": 0.3304455280303955, "epoch": 3.4246575342465753, "grad_norm": 1.5619526383670652, "kl": 0.6794264912605286, "learning_rate": 4.145547945205479e-07, "loss": -0.0173, "step": 500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1000.0, "completions/mean_length": 321.4107360839844, "completions/mean_terminated_length": 272.7090759277344, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.3805066645145416, "epoch": 3.4315068493150687, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.7386862874211204, "kl": 0.7269631028175354, "learning_rate": 4.143835616438356e-07, "loss": 0.1058, "num_tokens": 7887097.0, "reward": 0.5268490314483643, "reward_std": 0.059716857969760895, "rewards/check_gptzero_func/mean": 0.5268489718437195, "rewards/check_gptzero_func/std": 0.2167024463415146, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.999845027923584, "sampling/importance_sampling_ratio/min": 0.5505179762840271, "sampling/sampling_logp_difference/max": 1.266570806503296, "sampling/sampling_logp_difference/mean": 0.017485156655311584, "step": 501 }, { "clip_ratio/high_max": 0.01849346049129963, "clip_ratio/high_mean": 0.012742585502564907, "clip_ratio/low_mean": 0.009582691825926304, "clip_ratio/low_min": 0.003267973894253373, "clip_ratio/region_mean": 0.02232527732849121, "entropy": 0.380248099565506, "epoch": 3.4383561643835616, "grad_norm": 1.267246836610699, "kl": 0.603516697883606, "learning_rate": 4.1421232876712325e-07, "loss": 0.0987, "step": 502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 705.0, "completions/max_terminated_length": 705.0, "completions/mean_length": 184.60714721679688, "completions/mean_terminated_length": 184.60714721679688, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.25342103838920593, "epoch": 3.4452054794520546, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.1673627789815066, "kl": 0.7664753794670105, "learning_rate": 4.140410958904109e-07, "loss": -0.0025, "num_tokens": 7902039.0, "reward": 0.592180073261261, "reward_std": 0.06853179633617401, "rewards/check_gptzero_func/mean": 0.5921799540519714, "rewards/check_gptzero_func/std": 0.2554176151752472, "sampling/importance_sampling_ratio/max": 1.7520533800125122, "sampling/importance_sampling_ratio/mean": 0.9994977116584778, "sampling/importance_sampling_ratio/min": 0.620280385017395, "sampling/sampling_logp_difference/max": 0.5607883930206299, "sampling/sampling_logp_difference/mean": 0.014740061946213245, "step": 503 }, { "clip_ratio/high_max": 0.032460734248161316, "clip_ratio/high_mean": 0.018417079001665115, "clip_ratio/low_mean": 0.013782172463834286, "clip_ratio/low_min": 0.004940711427479982, "clip_ratio/region_mean": 0.03219924867153168, "entropy": 0.25281718373298645, "epoch": 3.452054794520548, "grad_norm": 1.7694214459501403, "kl": 0.7739506363868713, "learning_rate": 4.1386986301369866e-07, "loss": -0.012, "step": 504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 916.0, "completions/max_terminated_length": 916.0, "completions/mean_length": 272.0, "completions/mean_terminated_length": 272.0, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.46329689025878906, "epoch": 3.458904109589041, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.6387758132346049, "kl": 0.48296356201171875, "learning_rate": 4.136986301369863e-07, "loss": -0.0106, "num_tokens": 7921643.0, "reward": 0.6186950206756592, "reward_std": 0.06027238443493843, "rewards/check_gptzero_func/mean": 0.6186949610710144, "rewards/check_gptzero_func/std": 0.2903038263320923, "sampling/importance_sampling_ratio/max": 1.6511523723602295, "sampling/importance_sampling_ratio/mean": 0.999451220035553, "sampling/importance_sampling_ratio/min": 0.39969074726104736, "sampling/sampling_logp_difference/max": 0.9170641899108887, "sampling/sampling_logp_difference/mean": 0.019514381885528564, "step": 505 }, { "clip_ratio/high_max": 0.01997578702867031, "clip_ratio/high_mean": 0.014118080027401447, "clip_ratio/low_mean": 0.010758536867797375, "clip_ratio/low_min": 0.006386175751686096, "clip_ratio/region_mean": 0.02487661875784397, "entropy": 0.46532610058784485, "epoch": 3.4657534246575343, "grad_norm": 1.4925228619694457, "kl": 0.47649556398391724, "learning_rate": 4.1352739726027396e-07, "loss": -0.0188, "step": 506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 994.0, "completions/max_terminated_length": 994.0, "completions/mean_length": 259.39288330078125, "completions/mean_terminated_length": 259.39288330078125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.3948753774166107, "epoch": 3.4726027397260273, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.7595112201101022, "kl": 0.6864471435546875, "learning_rate": 4.133561643835616e-07, "loss": -0.0028, "num_tokens": 7941299.0, "reward": 0.5225849151611328, "reward_std": 0.07563921809196472, "rewards/check_gptzero_func/mean": 0.5225849151611328, "rewards/check_gptzero_func/std": 0.26429808139801025, "sampling/importance_sampling_ratio/max": 1.9441807270050049, "sampling/importance_sampling_ratio/mean": 0.9997574687004089, "sampling/importance_sampling_ratio/min": 0.5516567230224609, "sampling/sampling_logp_difference/max": 0.6648406982421875, "sampling/sampling_logp_difference/mean": 0.018412522971630096, "step": 507 }, { "clip_ratio/high_max": 0.020576132461428642, "clip_ratio/high_mean": 0.015805507078766823, "clip_ratio/low_mean": 0.01140408031642437, "clip_ratio/low_min": 0.007014809176325798, "clip_ratio/region_mean": 0.027209585532546043, "entropy": 0.39518049359321594, "epoch": 3.4794520547945207, "grad_norm": 1.597995459567177, "kl": 0.6762586236000061, "learning_rate": 4.131849315068493e-07, "loss": -0.0117, "step": 508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1242.0, "completions/max_terminated_length": 1242.0, "completions/mean_length": 241.46429443359375, "completions/mean_terminated_length": 241.46429443359375, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "entropy": 0.3290470540523529, "epoch": 3.4863013698630136, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.9261806491619051, "kl": 0.6026144623756409, "learning_rate": 4.1301369863013696e-07, "loss": -0.0099, "num_tokens": 7960449.0, "reward": 0.5151383280754089, "reward_std": 0.08390641957521439, "rewards/check_gptzero_func/mean": 0.5151382684707642, "rewards/check_gptzero_func/std": 0.28103935718536377, "sampling/importance_sampling_ratio/max": 1.6272636651992798, "sampling/importance_sampling_ratio/mean": 1.0008094310760498, "sampling/importance_sampling_ratio/min": 0.6387778520584106, "sampling/sampling_logp_difference/max": 0.48689985275268555, "sampling/sampling_logp_difference/mean": 0.01520937867462635, "step": 509 }, { "clip_ratio/high_max": 0.01591956429183483, "clip_ratio/high_mean": 0.013258509337902069, "clip_ratio/low_mean": 0.01083920244127512, "clip_ratio/low_min": 0.0071428571827709675, "clip_ratio/region_mean": 0.024097710847854614, "entropy": 0.3293898403644562, "epoch": 3.493150684931507, "grad_norm": 23643.71178835476, "kl": 1114.735107421875, "learning_rate": 4.128424657534246e-07, "loss": 3.9578, "step": 510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 925.0, "completions/max_terminated_length": 925.0, "completions/mean_length": 260.9821472167969, "completions/mean_terminated_length": 260.9821472167969, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.41454529762268066, "epoch": 3.5, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.8841169318675384, "kl": 0.6244708299636841, "learning_rate": 4.126712328767123e-07, "loss": -0.0057, "num_tokens": 7980258.0, "reward": 0.6075229048728943, "reward_std": 0.04297446832060814, "rewards/check_gptzero_func/mean": 0.6075229644775391, "rewards/check_gptzero_func/std": 0.28353285789489746, "sampling/importance_sampling_ratio/max": 1.6298394203186035, "sampling/importance_sampling_ratio/mean": 0.9994879364967346, "sampling/importance_sampling_ratio/min": 0.6073929667472839, "sampling/sampling_logp_difference/max": 0.4985792636871338, "sampling/sampling_logp_difference/mean": 0.017810553312301636, "step": 511 }, { "clip_ratio/high_max": 0.016267942264676094, "clip_ratio/high_mean": 0.011614644899964333, "clip_ratio/low_mean": 0.011317742988467216, "clip_ratio/low_min": 0.007205764763057232, "clip_ratio/region_mean": 0.0229323897510767, "entropy": 0.4131337106227875, "epoch": 3.506849315068493, "grad_norm": 1.5159560767505844, "kl": 0.5935631394386292, "learning_rate": 4.1249999999999997e-07, "loss": -0.0149, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 899.0, "completions/max_terminated_length": 899.0, "completions/mean_length": 194.96429443359375, "completions/mean_terminated_length": 194.96429443359375, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.34711578488349915, "epoch": 3.5136986301369864, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.9654561062323785, "kl": 0.6148673295974731, "learning_rate": 4.1232876712328767e-07, "loss": -0.0018, "num_tokens": 7995654.0, "reward": 0.5841370820999146, "reward_std": 0.04108033329248428, "rewards/check_gptzero_func/mean": 0.5841370224952698, "rewards/check_gptzero_func/std": 0.2692674994468689, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9999628067016602, "sampling/importance_sampling_ratio/min": 0.3413031995296478, "sampling/sampling_logp_difference/max": 1.074984073638916, "sampling/sampling_logp_difference/mean": 0.016575396060943604, "step": 513 }, { "clip_ratio/high_max": 0.020486555993556976, "clip_ratio/high_mean": 0.013894313015043736, "clip_ratio/low_mean": 0.01494431309401989, "clip_ratio/low_min": 0.010489510372281075, "clip_ratio/region_mean": 0.02883862890303135, "entropy": 0.3438382148742676, "epoch": 3.5205479452054793, "grad_norm": 2.121766757747225, "kl": 0.6414299607276917, "learning_rate": 4.121575342465753e-07, "loss": -0.0101, "step": 514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1002.0, "completions/max_terminated_length": 1002.0, "completions/mean_length": 309.9464416503906, "completions/mean_terminated_length": 309.9464416503906, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.4968717396259308, "epoch": 3.5273972602739727, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.7643323370648378, "kl": 0.5674458742141724, "learning_rate": 4.11986301369863e-07, "loss": -0.004, "num_tokens": 8017313.0, "reward": 0.5069456100463867, "reward_std": 0.11505816131830215, "rewards/check_gptzero_func/mean": 0.5069456100463867, "rewards/check_gptzero_func/std": 0.2840609848499298, "sampling/importance_sampling_ratio/max": 1.7360812425613403, "sampling/importance_sampling_ratio/mean": 0.9998739957809448, "sampling/importance_sampling_ratio/min": 0.22986865043640137, "sampling/sampling_logp_difference/max": 1.4702472686767578, "sampling/sampling_logp_difference/mean": 0.020731190219521523, "step": 515 }, { "clip_ratio/high_max": 0.022954680025577545, "clip_ratio/high_mean": 0.014615772292017937, "clip_ratio/low_mean": 0.01082452479749918, "clip_ratio/low_min": 0.007344998884946108, "clip_ratio/region_mean": 0.025440296158194542, "entropy": 0.4996481239795685, "epoch": 3.5342465753424657, "grad_norm": 2.030751808760827, "kl": 0.5564743280410767, "learning_rate": 4.118150684931507e-07, "loss": -0.0126, "step": 516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1150.0, "completions/max_terminated_length": 1150.0, "completions/mean_length": 314.8571472167969, "completions/mean_terminated_length": 314.8571472167969, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.40225696563720703, "epoch": 3.541095890410959, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.5165625331498007, "kl": 0.49749523401260376, "learning_rate": 4.116438356164383e-07, "loss": 0.0197, "num_tokens": 8040231.0, "reward": 0.538957953453064, "reward_std": 0.13057827949523926, "rewards/check_gptzero_func/mean": 0.5389578938484192, "rewards/check_gptzero_func/std": 0.2849627733230591, "sampling/importance_sampling_ratio/max": 1.632657527923584, "sampling/importance_sampling_ratio/mean": 1.0005923509597778, "sampling/importance_sampling_ratio/min": 0.4813205301761627, "sampling/sampling_logp_difference/max": 0.7312219142913818, "sampling/sampling_logp_difference/mean": 0.017453664913773537, "step": 517 }, { "clip_ratio/high_max": 0.014555077999830246, "clip_ratio/high_mean": 0.01144361961632967, "clip_ratio/low_mean": 0.010354082100093365, "clip_ratio/low_min": 0.005280528217554092, "clip_ratio/region_mean": 0.021797701716423035, "entropy": 0.4019301235675812, "epoch": 3.547945205479452, "grad_norm": 1.277544753253443, "kl": 0.5269235372543335, "learning_rate": 4.1147260273972603e-07, "loss": 0.0118, "step": 518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 290.3571472167969, "completions/mean_terminated_length": 290.3571472167969, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.44263675808906555, "epoch": 3.5547945205479454, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.153099902422354, "kl": 0.5977301001548767, "learning_rate": 4.113013698630137e-07, "loss": -0.0103, "num_tokens": 8061671.0, "reward": 0.5362567901611328, "reward_std": 0.13983400166034698, "rewards/check_gptzero_func/mean": 0.5362567901611328, "rewards/check_gptzero_func/std": 0.30682802200317383, "sampling/importance_sampling_ratio/max": 1.5792690515518188, "sampling/importance_sampling_ratio/mean": 0.9997534155845642, "sampling/importance_sampling_ratio/min": 0.3676503002643585, "sampling/sampling_logp_difference/max": 1.000623106956482, "sampling/sampling_logp_difference/mean": 0.018685046583414078, "step": 519 }, { "clip_ratio/high_max": 0.019491245970129967, "clip_ratio/high_mean": 0.013813391327857971, "clip_ratio/low_mean": 0.008168717846274376, "clip_ratio/low_min": 0.003872633446007967, "clip_ratio/region_mean": 0.021982107311487198, "entropy": 0.44457578659057617, "epoch": 3.5616438356164384, "grad_norm": 6.230933780033433, "kl": 0.5489618182182312, "learning_rate": 4.1113013698630133e-07, "loss": -0.0148, "step": 520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/max_terminated_length": 405.0, "completions/mean_length": 197.9107208251953, "completions/mean_terminated_length": 197.9107208251953, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "entropy": 0.32892370223999023, "epoch": 3.5684931506849313, "frac_reward_zero_std": 0.0, "grad_norm": 2.030222582416303, "kl": 0.8415141701698303, "learning_rate": 4.10958904109589e-07, "loss": 0.0032, "num_tokens": 8078034.0, "reward": 0.5535396337509155, "reward_std": 0.042353156954050064, "rewards/check_gptzero_func/mean": 0.5535395741462708, "rewards/check_gptzero_func/std": 0.22505973279476166, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0003951787948608, "sampling/importance_sampling_ratio/min": 0.3423502445220947, "sampling/sampling_logp_difference/max": 1.0719208717346191, "sampling/sampling_logp_difference/mean": 0.018017105758190155, "step": 521 }, { "clip_ratio/high_max": 0.022176379337906837, "clip_ratio/high_mean": 0.017972823232412338, "clip_ratio/low_mean": 0.0147507693618536, "clip_ratio/low_min": 0.008866370655596256, "clip_ratio/region_mean": 0.03272358700633049, "entropy": 0.33190712332725525, "epoch": 3.5753424657534247, "grad_norm": 2.267918250386667, "kl": 0.7950897812843323, "learning_rate": 4.1078767123287674e-07, "loss": -0.0051, "step": 522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 951.0, "completions/max_terminated_length": 951.0, "completions/mean_length": 232.6785888671875, "completions/mean_terminated_length": 232.6785888671875, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.3727235794067383, "epoch": 3.5821917808219177, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.0207289064829888, "kl": 0.6727575659751892, "learning_rate": 4.106164383561644e-07, "loss": 0.002, "num_tokens": 8095708.0, "reward": 0.518436849117279, "reward_std": 0.0734190121293068, "rewards/check_gptzero_func/mean": 0.5184367895126343, "rewards/check_gptzero_func/std": 0.2719859778881073, "sampling/importance_sampling_ratio/max": 1.5184223651885986, "sampling/importance_sampling_ratio/mean": 1.0007203817367554, "sampling/importance_sampling_ratio/min": 0.569301187992096, "sampling/sampling_logp_difference/max": 0.5633456707000732, "sampling/sampling_logp_difference/mean": 0.01771610789000988, "step": 523 }, { "clip_ratio/high_max": 0.020190022885799408, "clip_ratio/high_mean": 0.015444931574165821, "clip_ratio/low_mean": 0.014132468029856682, "clip_ratio/low_min": 0.009798865765333176, "clip_ratio/region_mean": 0.029577400535345078, "entropy": 0.3745793402194977, "epoch": 3.589041095890411, "grad_norm": 1.6116590375689233, "kl": 0.654737651348114, "learning_rate": 4.1044520547945204e-07, "loss": -0.0064, "step": 524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1021.0, "completions/max_terminated_length": 1021.0, "completions/mean_length": 204.25001525878906, "completions/mean_terminated_length": 204.25001525878906, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.3441799283027649, "epoch": 3.595890410958904, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 2.012630863087215, "kl": 0.6832078695297241, "learning_rate": 4.1027397260273974e-07, "loss": -0.0141, "num_tokens": 8112560.0, "reward": 0.5731584429740906, "reward_std": 0.053310420364141464, "rewards/check_gptzero_func/mean": 0.5731583833694458, "rewards/check_gptzero_func/std": 0.29931163787841797, "sampling/importance_sampling_ratio/max": 1.547896385192871, "sampling/importance_sampling_ratio/mean": 1.0002151727676392, "sampling/importance_sampling_ratio/min": 0.4983289837837219, "sampling/sampling_logp_difference/max": 0.6964948177337646, "sampling/sampling_logp_difference/mean": 0.01567707769572735, "step": 525 }, { "clip_ratio/high_max": 0.02231237292289734, "clip_ratio/high_mean": 0.016250547021627426, "clip_ratio/low_mean": 0.013533170334994793, "clip_ratio/low_min": 0.008750437758862972, "clip_ratio/region_mean": 0.029783716425299644, "entropy": 0.3456241190433502, "epoch": 3.602739726027397, "grad_norm": 1.6427646078169542, "kl": 0.6672511696815491, "learning_rate": 4.101027397260274e-07, "loss": -0.0235, "step": 526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1141.0, "completions/max_terminated_length": 1141.0, "completions/mean_length": 236.9107208251953, "completions/mean_terminated_length": 236.9107208251953, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.4058573246002197, "epoch": 3.6095890410958904, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.968396634813148, "kl": 0.6565157771110535, "learning_rate": 4.0993150684931504e-07, "loss": 0.0161, "num_tokens": 8130233.0, "reward": 0.5023694634437561, "reward_std": 0.09721184521913528, "rewards/check_gptzero_func/mean": 0.5023694038391113, "rewards/check_gptzero_func/std": 0.276950865983963, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9993639588356018, "sampling/importance_sampling_ratio/min": 0.5697520971298218, "sampling/sampling_logp_difference/max": 0.8224198818206787, "sampling/sampling_logp_difference/mean": 0.01906258799135685, "step": 527 }, { "clip_ratio/high_max": 0.021439509466290474, "clip_ratio/high_mean": 0.015188291668891907, "clip_ratio/low_mean": 0.011916237883269787, "clip_ratio/low_min": 0.006269592326134443, "clip_ratio/region_mean": 0.027104530483484268, "entropy": 0.4085175395011902, "epoch": 3.616438356164384, "grad_norm": 1.6506465888953263, "kl": 0.6518436670303345, "learning_rate": 4.097602739726027e-07, "loss": 0.007, "step": 528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 872.0, "completions/max_terminated_length": 872.0, "completions/mean_length": 203.23214721679688, "completions/mean_terminated_length": 203.23214721679688, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.38269349932670593, "epoch": 3.6232876712328768, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.841015693569485, "kl": 0.6829571723937988, "learning_rate": 4.095890410958904e-07, "loss": 0.0569, "num_tokens": 8146222.0, "reward": 0.6430785059928894, "reward_std": 0.060823529958724976, "rewards/check_gptzero_func/mean": 0.6430784463882446, "rewards/check_gptzero_func/std": 0.26963090896606445, "sampling/importance_sampling_ratio/max": 1.663030743598938, "sampling/importance_sampling_ratio/mean": 1.0005459785461426, "sampling/importance_sampling_ratio/min": 0.6175824999809265, "sampling/sampling_logp_difference/max": 0.5086417198181152, "sampling/sampling_logp_difference/mean": 0.017277594655752182, "step": 529 }, { "clip_ratio/high_max": 0.017864078283309937, "clip_ratio/high_mean": 0.011959263123571873, "clip_ratio/low_mean": 0.013992327265441418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.02595159038901329, "entropy": 0.38266894221305847, "epoch": 3.6301369863013697, "grad_norm": 1.6176924891416251, "kl": 0.7036113739013672, "learning_rate": 4.0941780821917805e-07, "loss": 0.0477, "step": 530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 219.62501525878906, "completions/mean_terminated_length": 219.62501525878906, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.31951504945755005, "epoch": 3.636986301369863, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.789553606828539, "kl": 0.6712629199028015, "learning_rate": 4.0924657534246575e-07, "loss": -0.0207, "num_tokens": 8163055.0, "reward": 0.5493257641792297, "reward_std": 0.07636415958404541, "rewards/check_gptzero_func/mean": 0.5493257641792297, "rewards/check_gptzero_func/std": 0.3064437508583069, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9985095262527466, "sampling/importance_sampling_ratio/min": 0.520785391330719, "sampling/sampling_logp_difference/max": 0.8178794384002686, "sampling/sampling_logp_difference/mean": 0.015959467738866806, "step": 531 }, { "clip_ratio/high_max": 0.019307589158415794, "clip_ratio/high_mean": 0.01610085926949978, "clip_ratio/low_mean": 0.014410363510251045, "clip_ratio/low_min": 0.00987432710826397, "clip_ratio/region_mean": 0.030511220917105675, "entropy": 0.3190317749977112, "epoch": 3.643835616438356, "grad_norm": 1.4008475791846378, "kl": 0.669557511806488, "learning_rate": 4.090753424657534e-07, "loss": -0.0293, "step": 532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 694.0, "completions/max_terminated_length": 694.0, "completions/mean_length": 209.1428680419922, "completions/mean_terminated_length": 209.1428680419922, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "entropy": 0.31180527806282043, "epoch": 3.6506849315068495, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.9657630044536012, "kl": 0.8122105002403259, "learning_rate": 4.089041095890411e-07, "loss": -0.0049, "num_tokens": 8180027.0, "reward": 0.49012601375579834, "reward_std": 0.055693160742521286, "rewards/check_gptzero_func/mean": 0.49012598395347595, "rewards/check_gptzero_func/std": 0.24345165491104126, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0004247426986694, "sampling/importance_sampling_ratio/min": 0.35686156153678894, "sampling/sampling_logp_difference/max": 1.030407428741455, "sampling/sampling_logp_difference/mean": 0.0171236302703619, "step": 533 }, { "clip_ratio/high_max": 0.02671118453145027, "clip_ratio/high_mean": 0.016905609518289566, "clip_ratio/low_mean": 0.013705812394618988, "clip_ratio/low_min": 0.00936454813927412, "clip_ratio/region_mean": 0.030611421912908554, "entropy": 0.31268683075904846, "epoch": 3.6575342465753424, "grad_norm": 7.333389296658405, "kl": 0.7620071768760681, "learning_rate": 4.0873287671232875e-07, "loss": -0.009, "step": 534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 243.1428680419922, "completions/mean_terminated_length": 243.1428680419922, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.43053263425827026, "epoch": 3.6643835616438354, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.8365978258418436, "kl": 0.557949423789978, "learning_rate": 4.085616438356164e-07, "loss": -0.0018, "num_tokens": 8198335.0, "reward": 0.5407435297966003, "reward_std": 0.0627339780330658, "rewards/check_gptzero_func/mean": 0.5407434701919556, "rewards/check_gptzero_func/std": 0.299507737159729, "sampling/importance_sampling_ratio/max": 1.6210367679595947, "sampling/importance_sampling_ratio/mean": 0.999654233455658, "sampling/importance_sampling_ratio/min": 0.5284030437469482, "sampling/sampling_logp_difference/max": 0.637895941734314, "sampling/sampling_logp_difference/mean": 0.019140055403113365, "step": 535 }, { "clip_ratio/high_max": 0.023354563862085342, "clip_ratio/high_mean": 0.015389936044812202, "clip_ratio/low_mean": 0.010352830402553082, "clip_ratio/low_min": 0.005651595536619425, "clip_ratio/region_mean": 0.02574276737868786, "entropy": 0.433275431394577, "epoch": 3.671232876712329, "grad_norm": 1.4277548438561278, "kl": 0.5508631467819214, "learning_rate": 4.083904109589041e-07, "loss": -0.0113, "step": 536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 373.0, "completions/max_terminated_length": 373.0, "completions/mean_length": 141.83929443359375, "completions/mean_terminated_length": 141.83929443359375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.24983999133110046, "epoch": 3.678082191780822, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.254290990702725, "kl": 0.9062791466712952, "learning_rate": 4.0821917808219176e-07, "loss": 0.016, "num_tokens": 8211262.0, "reward": 0.5778093338012695, "reward_std": 0.09911978989839554, "rewards/check_gptzero_func/mean": 0.5778093338012695, "rewards/check_gptzero_func/std": 0.33692699670791626, "sampling/importance_sampling_ratio/max": 1.8316444158554077, "sampling/importance_sampling_ratio/mean": 0.9989926218986511, "sampling/importance_sampling_ratio/min": 0.32853052020072937, "sampling/sampling_logp_difference/max": 1.1131255626678467, "sampling/sampling_logp_difference/mean": 0.015056335367262363, "step": 537 }, { "clip_ratio/high_max": 0.029061103239655495, "clip_ratio/high_mean": 0.017664771527051926, "clip_ratio/low_mean": 0.019083797931671143, "clip_ratio/low_min": 0.013313609175384045, "clip_ratio/region_mean": 0.03674856945872307, "entropy": 0.24980582296848297, "epoch": 3.684931506849315, "grad_norm": 1.5833702591666279, "kl": 0.9136479496955872, "learning_rate": 4.080479452054794e-07, "loss": 0.0064, "step": 538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 930.0, "completions/max_terminated_length": 930.0, "completions/mean_length": 189.69644165039062, "completions/mean_terminated_length": 189.69644165039062, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.3418835699558258, "epoch": 3.691780821917808, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.1254604187559303, "kl": 0.7343325614929199, "learning_rate": 4.0787671232876706e-07, "loss": 0.0497, "num_tokens": 8227045.0, "reward": 0.42875826358795166, "reward_std": 0.05867968872189522, "rewards/check_gptzero_func/mean": 0.4287582039833069, "rewards/check_gptzero_func/std": 0.3596462607383728, "sampling/importance_sampling_ratio/max": 1.9876788854599, "sampling/importance_sampling_ratio/mean": 0.9999411702156067, "sampling/importance_sampling_ratio/min": 0.5038086771965027, "sampling/sampling_logp_difference/max": 0.6869676113128662, "sampling/sampling_logp_difference/mean": 0.016489019617438316, "step": 539 }, { "clip_ratio/high_max": 0.017146775498986244, "clip_ratio/high_mean": 0.013706820085644722, "clip_ratio/low_mean": 0.013988875783979893, "clip_ratio/low_min": 0.010655737482011318, "clip_ratio/region_mean": 0.02769569680094719, "entropy": 0.3419395983219147, "epoch": 3.6986301369863015, "grad_norm": 583.6908379014507, "kl": 9.426125526428223, "learning_rate": 4.077054794520548e-07, "loss": 0.1603, "step": 540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 961.0, "completions/max_terminated_length": 961.0, "completions/mean_length": 210.57144165039062, "completions/mean_terminated_length": 210.57144165039062, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.32739681005477905, "epoch": 3.7054794520547945, "frac_reward_zero_std": 0.0, "grad_norm": 1.9562116763701745, "kl": 0.7952190637588501, "learning_rate": 4.0753424657534246e-07, "loss": 0.0167, "num_tokens": 8244185.0, "reward": 0.4751635789871216, "reward_std": 0.09153632819652557, "rewards/check_gptzero_func/mean": 0.4751635491847992, "rewards/check_gptzero_func/std": 0.2708762586116791, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0007917881011963, "sampling/importance_sampling_ratio/min": 0.4954739511013031, "sampling/sampling_logp_difference/max": 0.8144292831420898, "sampling/sampling_logp_difference/mean": 0.017338264733552933, "step": 541 }, { "clip_ratio/high_max": 0.01686909608542919, "clip_ratio/high_mean": 0.014727422036230564, "clip_ratio/low_mean": 0.013090311549603939, "clip_ratio/low_min": 0.008097166195511818, "clip_ratio/region_mean": 0.027817735448479652, "entropy": 0.32957977056503296, "epoch": 3.712328767123288, "grad_norm": 1.663272428596219, "kl": 0.8114193081855774, "learning_rate": 4.073630136986301e-07, "loss": 0.0083, "step": 542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 884.0, "completions/max_terminated_length": 884.0, "completions/mean_length": 173.82144165039062, "completions/mean_terminated_length": 173.82144165039062, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.2740571200847626, "epoch": 3.719178082191781, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.2570279216978153, "kl": 0.9865518808364868, "learning_rate": 4.071917808219178e-07, "loss": 0.0026, "num_tokens": 8258967.0, "reward": 0.5885953903198242, "reward_std": 0.06375585496425629, "rewards/check_gptzero_func/mean": 0.5885953903198242, "rewards/check_gptzero_func/std": 0.28805452585220337, "sampling/importance_sampling_ratio/max": 1.9872146844863892, "sampling/importance_sampling_ratio/mean": 1.0006811618804932, "sampling/importance_sampling_ratio/min": 0.4817444086074829, "sampling/sampling_logp_difference/max": 0.7303416728973389, "sampling/sampling_logp_difference/mean": 0.015017830766737461, "step": 543 }, { "clip_ratio/high_max": 0.020809248089790344, "clip_ratio/high_mean": 0.01609078235924244, "clip_ratio/low_mean": 0.015068004839122295, "clip_ratio/low_min": 0.009253547526896, "clip_ratio/region_mean": 0.03115878812968731, "entropy": 0.2775017321109772, "epoch": 3.7260273972602738, "grad_norm": 1.9044908688662454, "kl": 0.9141503572463989, "learning_rate": 4.0702054794520547e-07, "loss": -0.0051, "step": 544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 772.0, "completions/max_terminated_length": 772.0, "completions/mean_length": 165.42857360839844, "completions/mean_terminated_length": 165.42857360839844, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.27846065163612366, "epoch": 3.732876712328767, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.378855360983226, "kl": 0.830904483795166, "learning_rate": 4.068493150684931e-07, "loss": -0.0287, "num_tokens": 8272687.0, "reward": 0.5405403971672058, "reward_std": 0.1377490609884262, "rewards/check_gptzero_func/mean": 0.540540337562561, "rewards/check_gptzero_func/std": 0.326323926448822, "sampling/importance_sampling_ratio/max": 1.820396065711975, "sampling/importance_sampling_ratio/mean": 0.9997660517692566, "sampling/importance_sampling_ratio/min": 0.4469069242477417, "sampling/sampling_logp_difference/max": 0.8054050207138062, "sampling/sampling_logp_difference/mean": 0.015013833530247211, "step": 545 }, { "clip_ratio/high_max": 0.036418817937374115, "clip_ratio/high_mean": 0.019802426919341087, "clip_ratio/low_mean": 0.014047021046280861, "clip_ratio/low_min": 0.006000000052154064, "clip_ratio/region_mean": 0.03384945169091225, "entropy": 0.2776663303375244, "epoch": 3.73972602739726, "grad_norm": 1.7248844653725606, "kl": 0.8283271789550781, "learning_rate": 4.0667808219178077e-07, "loss": -0.0387, "step": 546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 609.0, "completions/max_terminated_length": 609.0, "completions/mean_length": 155.60714721679688, "completions/mean_terminated_length": 155.60714721679688, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.2744041085243225, "epoch": 3.7465753424657535, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.245810203410873, "kl": 0.7639920115470886, "learning_rate": 4.0650684931506847e-07, "loss": 0.0077, "num_tokens": 8286401.0, "reward": 0.5257667303085327, "reward_std": 0.06094924733042717, "rewards/check_gptzero_func/mean": 0.5257666707038879, "rewards/check_gptzero_func/std": 0.32959744334220886, "sampling/importance_sampling_ratio/max": 1.6370387077331543, "sampling/importance_sampling_ratio/mean": 1.0001276731491089, "sampling/importance_sampling_ratio/min": 0.20323863625526428, "sampling/sampling_logp_difference/max": 1.593374490737915, "sampling/sampling_logp_difference/mean": 0.015029184520244598, "step": 547 }, { "clip_ratio/high_max": 0.030685920268297195, "clip_ratio/high_mean": 0.017333367839455605, "clip_ratio/low_mean": 0.015855910256505013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.03318927809596062, "entropy": 0.2747938632965088, "epoch": 3.7534246575342465, "grad_norm": 1.8164174237834185, "kl": 0.7790098786354065, "learning_rate": 4.063356164383561e-07, "loss": -0.0009, "step": 548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 637.0, "completions/max_terminated_length": 637.0, "completions/mean_length": 170.9107208251953, "completions/mean_terminated_length": 170.9107208251953, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.300279438495636, "epoch": 3.76027397260274, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.19162754375583, "kl": 0.7435851097106934, "learning_rate": 4.0616438356164383e-07, "loss": -0.0107, "num_tokens": 8300620.0, "reward": 0.5291458964347839, "reward_std": 0.11349273473024368, "rewards/check_gptzero_func/mean": 0.5291458368301392, "rewards/check_gptzero_func/std": 0.3808373808860779, "sampling/importance_sampling_ratio/max": 1.6252249479293823, "sampling/importance_sampling_ratio/mean": 1.0002145767211914, "sampling/importance_sampling_ratio/min": 0.4783073365688324, "sampling/sampling_logp_difference/max": 0.737501859664917, "sampling/sampling_logp_difference/mean": 0.01527828723192215, "step": 549 }, { "clip_ratio/high_max": 0.026073619723320007, "clip_ratio/high_mean": 0.020218422636389732, "clip_ratio/low_mean": 0.015081233344972134, "clip_ratio/low_min": 0.00754497991874814, "clip_ratio/region_mean": 0.03529965505003929, "entropy": 0.3054214119911194, "epoch": 3.767123287671233, "grad_norm": 2.39758015464492, "kl": 0.7199181318283081, "learning_rate": 4.0599315068493153e-07, "loss": -0.019, "step": 550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 358.0, "completions/max_terminated_length": 358.0, "completions/mean_length": 129.625, "completions/mean_terminated_length": 129.625, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.2414800077676773, "epoch": 3.7739726027397262, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.5762804643167474, "kl": 0.982833206653595, "learning_rate": 4.058219178082192e-07, "loss": 0.0179, "num_tokens": 8312499.0, "reward": 0.6221855878829956, "reward_std": 0.0765829086303711, "rewards/check_gptzero_func/mean": 0.6221855878829956, "rewards/check_gptzero_func/std": 0.36647510528564453, "sampling/importance_sampling_ratio/max": 1.755321741104126, "sampling/importance_sampling_ratio/mean": 0.9992480874061584, "sampling/importance_sampling_ratio/min": 0.4761906862258911, "sampling/sampling_logp_difference/max": 0.7419369220733643, "sampling/sampling_logp_difference/mean": 0.014316506683826447, "step": 551 }, { "clip_ratio/high_max": 0.02961275540292263, "clip_ratio/high_mean": 0.018482549116015434, "clip_ratio/low_mean": 0.0167313814163208, "clip_ratio/low_min": 0.012858555652201176, "clip_ratio/region_mean": 0.035213928669691086, "entropy": 0.24010474979877472, "epoch": 3.780821917808219, "grad_norm": 1.8410050356779062, "kl": 0.9343231916427612, "learning_rate": 4.0565068493150683e-07, "loss": 0.0081, "step": 552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 609.0, "completions/max_terminated_length": 609.0, "completions/mean_length": 148.98214721679688, "completions/mean_terminated_length": 148.98214721679688, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.2257644385099411, "epoch": 3.787671232876712, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.1692562648828773, "kl": 0.9061035513877869, "learning_rate": 4.054794520547945e-07, "loss": 0.0218, "num_tokens": 8325686.0, "reward": 0.609175980091095, "reward_std": 0.09056154638528824, "rewards/check_gptzero_func/mean": 0.609175980091095, "rewards/check_gptzero_func/std": 0.3055764138698578, "sampling/importance_sampling_ratio/max": 1.6233638525009155, "sampling/importance_sampling_ratio/mean": 0.9995282292366028, "sampling/importance_sampling_ratio/min": 0.5985397696495056, "sampling/sampling_logp_difference/max": 0.5132622718811035, "sampling/sampling_logp_difference/mean": 0.013216841034591198, "step": 553 }, { "clip_ratio/high_max": 0.0257037952542305, "clip_ratio/high_mean": 0.01780097372829914, "clip_ratio/low_mean": 0.013116040267050266, "clip_ratio/low_min": 0.005973715800791979, "clip_ratio/region_mean": 0.030917014926671982, "entropy": 0.22691497206687927, "epoch": 3.7945205479452055, "grad_norm": 1.6252494576315173, "kl": 0.891621470451355, "learning_rate": 4.053082191780822e-07, "loss": 0.0125, "step": 554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 835.0, "completions/max_terminated_length": 835.0, "completions/mean_length": 185.07144165039062, "completions/mean_terminated_length": 185.07144165039062, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.3294243812561035, "epoch": 3.8013698630136985, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.4931418589997785, "kl": 0.7567666172981262, "learning_rate": 4.0513698630136983e-07, "loss": 0.0161, "num_tokens": 8341338.0, "reward": 0.6169933676719666, "reward_std": 0.11189045011997223, "rewards/check_gptzero_func/mean": 0.6169933676719666, "rewards/check_gptzero_func/std": 0.31206947565078735, "sampling/importance_sampling_ratio/max": 1.942894458770752, "sampling/importance_sampling_ratio/mean": 1.0000698566436768, "sampling/importance_sampling_ratio/min": 0.5362991094589233, "sampling/sampling_logp_difference/max": 0.6641788482666016, "sampling/sampling_logp_difference/mean": 0.01656663790345192, "step": 555 }, { "clip_ratio/high_max": 0.021459227427840233, "clip_ratio/high_mean": 0.013903082348406315, "clip_ratio/low_mean": 0.009945661760866642, "clip_ratio/low_min": 0.005740527994930744, "clip_ratio/region_mean": 0.023848745971918106, "entropy": 0.33020147681236267, "epoch": 3.808219178082192, "grad_norm": 2.1759837231867163, "kl": 0.7248051762580872, "learning_rate": 4.049657534246575e-07, "loss": 0.0074, "step": 556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 972.0, "completions/max_terminated_length": 972.0, "completions/mean_length": 218.71429443359375, "completions/mean_terminated_length": 218.71429443359375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.4552774429321289, "epoch": 3.815068493150685, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.0074343481093537, "kl": 0.6881422400474548, "learning_rate": 4.0479452054794514e-07, "loss": -0.0042, "num_tokens": 8358214.0, "reward": 0.5017450451850891, "reward_std": 0.049918875098228455, "rewards/check_gptzero_func/mean": 0.5017450451850891, "rewards/check_gptzero_func/std": 0.3204118013381958, "sampling/importance_sampling_ratio/max": 1.6976975202560425, "sampling/importance_sampling_ratio/mean": 1.0003818273544312, "sampling/importance_sampling_ratio/min": 0.38810789585113525, "sampling/sampling_logp_difference/max": 0.9464719295501709, "sampling/sampling_logp_difference/mean": 0.019959568977355957, "step": 557 }, { "clip_ratio/high_max": 0.016791045665740967, "clip_ratio/high_mean": 0.014338294975459576, "clip_ratio/low_mean": 0.013898683711886406, "clip_ratio/low_min": 0.008645533584058285, "clip_ratio/region_mean": 0.028236975893378258, "entropy": 0.4549736976623535, "epoch": 3.821917808219178, "grad_norm": 2.207584566008428, "kl": 0.7178505659103394, "learning_rate": 4.046232876712329e-07, "loss": -0.0145, "step": 558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1996.0, "completions/max_terminated_length": 1996.0, "completions/mean_length": 254.7857208251953, "completions/mean_terminated_length": 254.7857208251953, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.3577474057674408, "epoch": 3.828767123287671, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.168804349882595, "kl": 0.8509775996208191, "learning_rate": 4.0445205479452054e-07, "loss": 0.035, "num_tokens": 8377072.0, "reward": 0.5602884292602539, "reward_std": 0.07809793204069138, "rewards/check_gptzero_func/mean": 0.5602884292602539, "rewards/check_gptzero_func/std": 0.33807671070098877, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.000103235244751, "sampling/importance_sampling_ratio/min": 0.5292015671730042, "sampling/sampling_logp_difference/max": 0.7054338455200195, "sampling/sampling_logp_difference/mean": 0.016431018710136414, "step": 559 }, { "clip_ratio/high_max": 0.021113242954015732, "clip_ratio/high_mean": 0.01141637284308672, "clip_ratio/low_mean": 0.010099702514708042, "clip_ratio/low_min": 0.004830917809158564, "clip_ratio/region_mean": 0.02151607535779476, "entropy": 0.3589823842048645, "epoch": 3.8356164383561646, "grad_norm": 1.4927097503964064, "kl": 0.8559781908988953, "learning_rate": 4.042808219178082e-07, "loss": 0.0263, "step": 560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1233.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 155.85714721679688, "completions/mean_terminated_length": 155.85714721679688, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.28403568267822266, "epoch": 3.8424657534246576, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.0697836074113365, "kl": 0.780699610710144, "learning_rate": 4.041095890410959e-07, "loss": -0.009, "num_tokens": 8390906.0, "reward": 0.6176922917366028, "reward_std": 0.04789727181196213, "rewards/check_gptzero_func/mean": 0.6176922917366028, "rewards/check_gptzero_func/std": 0.37872314453125, "sampling/importance_sampling_ratio/max": 1.5469024181365967, "sampling/importance_sampling_ratio/mean": 1.0002381801605225, "sampling/importance_sampling_ratio/min": 0.35319802165031433, "sampling/sampling_logp_difference/max": 1.0407264232635498, "sampling/sampling_logp_difference/mean": 0.01467388216406107, "step": 561 }, { "clip_ratio/high_max": 0.0345982126891613, "clip_ratio/high_mean": 0.018956124782562256, "clip_ratio/low_mean": 0.014527380466461182, "clip_ratio/low_min": 0.00477326987311244, "clip_ratio/region_mean": 0.03348350524902344, "entropy": 0.2868184447288513, "epoch": 3.8493150684931505, "grad_norm": 1.9627665568717874, "kl": 0.757611095905304, "learning_rate": 4.0393835616438355e-07, "loss": -0.0181, "step": 562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 830.0, "completions/max_terminated_length": 830.0, "completions/mean_length": 166.125, "completions/mean_terminated_length": 166.125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.30339884757995605, "epoch": 3.856164383561644, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.3943841920153814, "kl": 0.8154360055923462, "learning_rate": 4.037671232876712e-07, "loss": 0.0037, "num_tokens": 8404887.0, "reward": 0.5760464072227478, "reward_std": 0.08495550602674484, "rewards/check_gptzero_func/mean": 0.5760464072227478, "rewards/check_gptzero_func/std": 0.2837744951248169, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9990900754928589, "sampling/importance_sampling_ratio/min": 0.4323926568031311, "sampling/sampling_logp_difference/max": 0.8384212255477905, "sampling/sampling_logp_difference/mean": 0.0163765586912632, "step": 563 }, { "clip_ratio/high_max": 0.021939953789114952, "clip_ratio/high_mean": 0.015789184719324112, "clip_ratio/low_mean": 0.01704300381243229, "clip_ratio/low_min": 0.009600000455975533, "clip_ratio/region_mean": 0.03283218666911125, "entropy": 0.29969915747642517, "epoch": 3.863013698630137, "grad_norm": 2.4028356802419677, "kl": 0.8325408101081848, "learning_rate": 4.0359589041095885e-07, "loss": -0.0051, "step": 564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 980.0, "completions/max_terminated_length": 980.0, "completions/mean_length": 240.75001525878906, "completions/mean_terminated_length": 240.75001525878906, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.41048547625541687, "epoch": 3.8698630136986303, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.8055626141476115, "kl": 0.6286023259162903, "learning_rate": 4.0342465753424655e-07, "loss": -0.0158, "num_tokens": 8422787.0, "reward": 0.6570413708686829, "reward_std": 0.07357772439718246, "rewards/check_gptzero_func/mean": 0.6570413708686829, "rewards/check_gptzero_func/std": 0.3348970413208008, "sampling/importance_sampling_ratio/max": 1.9776203632354736, "sampling/importance_sampling_ratio/mean": 0.9997029900550842, "sampling/importance_sampling_ratio/min": 0.4703739583492279, "sampling/sampling_logp_difference/max": 0.7542272806167603, "sampling/sampling_logp_difference/mean": 0.01804528199136257, "step": 565 }, { "clip_ratio/high_max": 0.02390710450708866, "clip_ratio/high_mean": 0.016406672075390816, "clip_ratio/low_mean": 0.01379375346004963, "clip_ratio/low_min": 0.00599700165912509, "clip_ratio/region_mean": 0.030200425535440445, "entropy": 0.40982022881507874, "epoch": 3.8767123287671232, "grad_norm": 1.4569275354784625, "kl": 0.6372540593147278, "learning_rate": 4.032534246575342e-07, "loss": -0.0249, "step": 566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 909.0, "completions/max_terminated_length": 909.0, "completions/mean_length": 217.55357360839844, "completions/mean_terminated_length": 217.55357360839844, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.34525227546691895, "epoch": 3.883561643835616, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.004228499735561, "kl": 0.609760582447052, "learning_rate": 4.030821917808219e-07, "loss": -0.0245, "num_tokens": 8440224.0, "reward": 0.6827398538589478, "reward_std": 0.10030685365200043, "rewards/check_gptzero_func/mean": 0.682739794254303, "rewards/check_gptzero_func/std": 0.29198065400123596, "sampling/importance_sampling_ratio/max": 1.5520037412643433, "sampling/importance_sampling_ratio/mean": 0.9998765587806702, "sampling/importance_sampling_ratio/min": 0.467472106218338, "sampling/sampling_logp_difference/max": 0.7604155540466309, "sampling/sampling_logp_difference/mean": 0.01629447750747204, "step": 567 }, { "clip_ratio/high_max": 0.022346368059515953, "clip_ratio/high_mean": 0.014960448257625103, "clip_ratio/low_mean": 0.013725915923714638, "clip_ratio/low_min": 0.007844947278499603, "clip_ratio/region_mean": 0.028686365112662315, "entropy": 0.3448318839073181, "epoch": 3.8904109589041096, "grad_norm": 35.96186741631496, "kl": 1.2430521249771118, "learning_rate": 4.029109589041096e-07, "loss": -0.0279, "step": 568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 839.0, "completions/max_terminated_length": 839.0, "completions/mean_length": 214.6428680419922, "completions/mean_terminated_length": 214.6428680419922, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.3400370180606842, "epoch": 3.897260273972603, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.9868557596360465, "kl": 0.7886174321174622, "learning_rate": 4.0273972602739726e-07, "loss": -0.0094, "num_tokens": 8457176.0, "reward": 0.5643998384475708, "reward_std": 0.052447132766246796, "rewards/check_gptzero_func/mean": 0.564399778842926, "rewards/check_gptzero_func/std": 0.2753923237323761, "sampling/importance_sampling_ratio/max": 1.7801809310913086, "sampling/importance_sampling_ratio/mean": 1.000014066696167, "sampling/importance_sampling_ratio/min": 0.34282323718070984, "sampling/sampling_logp_difference/max": 1.0705403089523315, "sampling/sampling_logp_difference/mean": 0.017226794734597206, "step": 569 }, { "clip_ratio/high_max": 0.021662604063749313, "clip_ratio/high_mean": 0.015255166217684746, "clip_ratio/low_mean": 0.015663394704461098, "clip_ratio/low_min": 0.010147133842110634, "clip_ratio/region_mean": 0.030918559059500694, "entropy": 0.3393872082233429, "epoch": 3.904109589041096, "grad_norm": 1.4969857375040094, "kl": 0.7802913784980774, "learning_rate": 4.025684931506849e-07, "loss": -0.019, "step": 570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1028.0, "completions/max_terminated_length": 1028.0, "completions/mean_length": 185.46429443359375, "completions/mean_terminated_length": 185.46429443359375, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.2858924865722656, "epoch": 3.910958904109589, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.10912252195914, "kl": 0.6171944737434387, "learning_rate": 4.0239726027397256e-07, "loss": -0.0138, "num_tokens": 8472358.0, "reward": 0.5158013701438904, "reward_std": 0.05453689768910408, "rewards/check_gptzero_func/mean": 0.5158013701438904, "rewards/check_gptzero_func/std": 0.4087916314601898, "sampling/importance_sampling_ratio/max": 1.5864301919937134, "sampling/importance_sampling_ratio/mean": 1.0000708103179932, "sampling/importance_sampling_ratio/min": 0.6271357536315918, "sampling/sampling_logp_difference/max": 0.46659231185913086, "sampling/sampling_logp_difference/mean": 0.014463403262197971, "step": 571 }, { "clip_ratio/high_max": 0.0282326377928257, "clip_ratio/high_mean": 0.016498800367116928, "clip_ratio/low_mean": 0.013513656333088875, "clip_ratio/low_min": 0.008652657270431519, "clip_ratio/region_mean": 0.030012454837560654, "entropy": 0.289556086063385, "epoch": 3.9178082191780823, "grad_norm": 1.8889463016281476, "kl": 0.6025843024253845, "learning_rate": 4.0222602739726026e-07, "loss": -0.0227, "step": 572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 680.0, "completions/max_terminated_length": 680.0, "completions/mean_length": 171.10714721679688, "completions/mean_terminated_length": 171.10714721679688, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.33099207282066345, "epoch": 3.9246575342465753, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.3647520333941916, "kl": 0.9193627238273621, "learning_rate": 4.020547945205479e-07, "loss": -0.0036, "num_tokens": 8486648.0, "reward": 0.6107355952262878, "reward_std": 0.08580674976110458, "rewards/check_gptzero_func/mean": 0.6107355356216431, "rewards/check_gptzero_func/std": 0.3358237147331238, "sampling/importance_sampling_ratio/max": 1.608952522277832, "sampling/importance_sampling_ratio/mean": 0.9990447759628296, "sampling/importance_sampling_ratio/min": 0.4440855383872986, "sampling/sampling_logp_difference/max": 0.811738133430481, "sampling/sampling_logp_difference/mean": 0.01763845607638359, "step": 573 }, { "clip_ratio/high_max": 0.02979515865445137, "clip_ratio/high_mean": 0.02109667845070362, "clip_ratio/low_mean": 0.015201778151094913, "clip_ratio/low_min": 0.010189228691160679, "clip_ratio/region_mean": 0.03629845753312111, "entropy": 0.3309042155742645, "epoch": 3.9315068493150687, "grad_norm": 1.9662034240336035, "kl": 0.8885659575462341, "learning_rate": 4.0188356164383556e-07, "loss": -0.0142, "step": 574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1063.0, "completions/max_terminated_length": 1063.0, "completions/mean_length": 196.87501525878906, "completions/mean_terminated_length": 196.87501525878906, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.3529164791107178, "epoch": 3.9383561643835616, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.113064554714409, "kl": 0.8383173942565918, "learning_rate": 4.0171232876712327e-07, "loss": 0.0277, "num_tokens": 8502899.0, "reward": 0.5403333306312561, "reward_std": 0.05231210216879845, "rewards/check_gptzero_func/mean": 0.5403333306312561, "rewards/check_gptzero_func/std": 0.3174295425415039, "sampling/importance_sampling_ratio/max": 1.6201753616333008, "sampling/importance_sampling_ratio/mean": 0.9999723434448242, "sampling/importance_sampling_ratio/min": 0.6319674253463745, "sampling/sampling_logp_difference/max": 0.48253440856933594, "sampling/sampling_logp_difference/mean": 0.016092976555228233, "step": 575 }, { "clip_ratio/high_max": 0.042424242943525314, "clip_ratio/high_mean": 0.0165067408233881, "clip_ratio/low_mean": 0.014563515782356262, "clip_ratio/low_min": 0.007054673507809639, "clip_ratio/region_mean": 0.031070252880454063, "entropy": 0.3555203974246979, "epoch": 3.9452054794520546, "grad_norm": 1.9089102406696428, "kl": 0.816659152507782, "learning_rate": 4.0154109589041097e-07, "loss": 0.0188, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 880.0, "completions/max_terminated_length": 880.0, "completions/mean_length": 177.1428680419922, "completions/mean_terminated_length": 177.1428680419922, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.3281347453594208, "epoch": 3.952054794520548, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.194447147024982, "kl": 0.8596234917640686, "learning_rate": 4.013698630136986e-07, "loss": 0.0685, "num_tokens": 8517447.0, "reward": 0.5280500650405884, "reward_std": 0.07808997482061386, "rewards/check_gptzero_func/mean": 0.5280500054359436, "rewards/check_gptzero_func/std": 0.3433384597301483, "sampling/importance_sampling_ratio/max": 1.5971788167953491, "sampling/importance_sampling_ratio/mean": 0.9996266961097717, "sampling/importance_sampling_ratio/min": 0.6298381686210632, "sampling/sampling_logp_difference/max": 0.46823883056640625, "sampling/sampling_logp_difference/mean": 0.016426322981715202, "step": 577 }, { "clip_ratio/high_max": 0.024147726595401764, "clip_ratio/high_mean": 0.016906308010220528, "clip_ratio/low_mean": 0.015578783117234707, "clip_ratio/low_min": 0.009845288470387459, "clip_ratio/region_mean": 0.03248509392142296, "entropy": 0.32763639092445374, "epoch": 3.958904109589041, "grad_norm": 1.812263597179838, "kl": 0.85824054479599, "learning_rate": 4.0119863013698627e-07, "loss": 0.0588, "step": 578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 913.0, "completions/max_terminated_length": 913.0, "completions/mean_length": 176.35714721679688, "completions/mean_terminated_length": 176.35714721679688, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.3249521255493164, "epoch": 3.9657534246575343, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.277804294549487, "kl": 0.8372082710266113, "learning_rate": 4.01027397260274e-07, "loss": 0.0191, "num_tokens": 8532473.0, "reward": 0.5246092081069946, "reward_std": 0.061752695590257645, "rewards/check_gptzero_func/mean": 0.5246091485023499, "rewards/check_gptzero_func/std": 0.3263891339302063, "sampling/importance_sampling_ratio/max": 1.8154503107070923, "sampling/importance_sampling_ratio/mean": 1.000023603439331, "sampling/importance_sampling_ratio/min": 0.628705620765686, "sampling/sampling_logp_difference/max": 0.5963335037231445, "sampling/sampling_logp_difference/mean": 0.015520088374614716, "step": 579 }, { "clip_ratio/high_max": 0.021812893450260162, "clip_ratio/high_mean": 0.01572697050869465, "clip_ratio/low_mean": 0.015419913455843925, "clip_ratio/low_min": 0.00826446246355772, "clip_ratio/region_mean": 0.031146883964538574, "entropy": 0.3236038386821747, "epoch": 3.9726027397260273, "grad_norm": 2.0680347375139103, "kl": 0.8532201051712036, "learning_rate": 4.008561643835616e-07, "loss": 0.0095, "step": 580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1163.0, "completions/max_terminated_length": 1163.0, "completions/mean_length": 303.375, "completions/mean_terminated_length": 303.375, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "entropy": 0.44928455352783203, "epoch": 3.9794520547945207, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.8678843287169933, "kl": 0.7124039530754089, "learning_rate": 4.006849315068493e-07, "loss": 0.0229, "num_tokens": 8554298.0, "reward": 0.5999687910079956, "reward_std": 0.08071979880332947, "rewards/check_gptzero_func/mean": 0.5999687910079956, "rewards/check_gptzero_func/std": 0.27428412437438965, "sampling/importance_sampling_ratio/max": 1.6199119091033936, "sampling/importance_sampling_ratio/mean": 0.9998503923416138, "sampling/importance_sampling_ratio/min": 0.4881192743778229, "sampling/sampling_logp_difference/max": 0.7171955108642578, "sampling/sampling_logp_difference/mean": 0.018967991694808006, "step": 581 }, { "clip_ratio/high_max": 0.01653868891298771, "clip_ratio/high_mean": 0.01376347430050373, "clip_ratio/low_mean": 0.010272788815200329, "clip_ratio/low_min": 0.0060606058686971664, "clip_ratio/region_mean": 0.024036262184381485, "entropy": 0.45214876532554626, "epoch": 3.9863013698630136, "grad_norm": 1.4085233902534684, "kl": 0.6808921098709106, "learning_rate": 4.00513698630137e-07, "loss": 0.0138, "step": 582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 905.0, "completions/max_terminated_length": 905.0, "completions/mean_length": 163.94644165039062, "completions/mean_terminated_length": 163.94644165039062, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.3295195698738098, "epoch": 3.993150684931507, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.0301052051703268, "kl": 0.8219018578529358, "learning_rate": 4.0034246575342463e-07, "loss": 0.0162, "num_tokens": 8568577.0, "reward": 0.7003747820854187, "reward_std": 0.059745609760284424, "rewards/check_gptzero_func/mean": 0.7003747224807739, "rewards/check_gptzero_func/std": 0.3077493906021118, "sampling/importance_sampling_ratio/max": 1.6598824262619019, "sampling/importance_sampling_ratio/mean": 0.9995865225791931, "sampling/importance_sampling_ratio/min": 0.5370743274688721, "sampling/sampling_logp_difference/max": 0.6216187477111816, "sampling/sampling_logp_difference/mean": 0.015437252819538116, "step": 583 }, { "clip_ratio/high_max": 0.023429179564118385, "clip_ratio/high_mean": 0.0157716553658247, "clip_ratio/low_mean": 0.014402217231690884, "clip_ratio/low_min": 0.0028763182926923037, "clip_ratio/region_mean": 0.03017387166619301, "entropy": 0.3277117908000946, "epoch": 4.0, "grad_norm": 1.5802269149869654, "kl": 0.8372132182121277, "learning_rate": 4.001712328767123e-07, "loss": 0.0066, "step": 584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 641.0, "completions/max_terminated_length": 641.0, "completions/mean_length": 143.19644165039062, "completions/mean_terminated_length": 143.19644165039062, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.2576902210712433, "epoch": 4.006849315068493, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.510284493896427, "kl": 1.0463067293167114, "learning_rate": 4e-07, "loss": 0.0006, "num_tokens": 8581288.0, "reward": 0.5901493430137634, "reward_std": 0.09005912393331528, "rewards/check_gptzero_func/mean": 0.5901493430137634, "rewards/check_gptzero_func/std": 0.31472718715667725, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0014638900756836, "sampling/importance_sampling_ratio/min": 0.627916693687439, "sampling/sampling_logp_difference/max": 0.8589162826538086, "sampling/sampling_logp_difference/mean": 0.014362276531755924, "step": 585 }, { "clip_ratio/high_max": 0.02365308813750744, "clip_ratio/high_mean": 0.016881437972187996, "clip_ratio/low_mean": 0.01206466369330883, "clip_ratio/low_min": 0.005479452200233936, "clip_ratio/region_mean": 0.028946101665496826, "entropy": 0.2570420801639557, "epoch": 4.013698630136986, "grad_norm": 1.8928655493, "kl": 0.9965308308601379, "learning_rate": 3.998287671232877e-07, "loss": -0.009, "step": 586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1054.0, "completions/max_terminated_length": 1054.0, "completions/mean_length": 115.0714340209961, "completions/mean_terminated_length": 115.0714340209961, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.2930266857147217, "epoch": 4.02054794520548, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.8247864951703727, "kl": 1.0817126035690308, "learning_rate": 3.9965753424657534e-07, "loss": -0.0238, "num_tokens": 8592198.0, "reward": 0.6312528848648071, "reward_std": 0.0636180192232132, "rewards/check_gptzero_func/mean": 0.6312528252601624, "rewards/check_gptzero_func/std": 0.35652732849121094, "sampling/importance_sampling_ratio/max": 1.766697883605957, "sampling/importance_sampling_ratio/mean": 1.0009968280792236, "sampling/importance_sampling_ratio/min": 0.6505504846572876, "sampling/sampling_logp_difference/max": 0.5691121816635132, "sampling/sampling_logp_difference/mean": 0.014180822297930717, "step": 587 }, { "clip_ratio/high_max": 0.028423773124814034, "clip_ratio/high_mean": 0.020665010437369347, "clip_ratio/low_mean": 0.021175917237997055, "clip_ratio/low_min": 0.01044386439025402, "clip_ratio/region_mean": 0.04184092953801155, "entropy": 0.29430750012397766, "epoch": 4.027397260273973, "grad_norm": 1.9057727031962584, "kl": 1.0336940288543701, "learning_rate": 3.99486301369863e-07, "loss": -0.0346, "step": 588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 921.0, "completions/max_terminated_length": 921.0, "completions/mean_length": 182.71429443359375, "completions/mean_terminated_length": 182.71429443359375, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.2977619171142578, "epoch": 4.034246575342466, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.157818492443765, "kl": 0.931157112121582, "learning_rate": 3.993150684931507e-07, "loss": 0.0058, "num_tokens": 8607484.0, "reward": 0.6185086965560913, "reward_std": 0.0887114554643631, "rewards/check_gptzero_func/mean": 0.6185086965560913, "rewards/check_gptzero_func/std": 0.3233037292957306, "sampling/importance_sampling_ratio/max": 1.4787092208862305, "sampling/importance_sampling_ratio/mean": 0.9997347593307495, "sampling/importance_sampling_ratio/min": 0.4309251606464386, "sampling/sampling_logp_difference/max": 0.8418208360671997, "sampling/sampling_logp_difference/mean": 0.014642903581261635, "step": 589 }, { "clip_ratio/high_max": 0.01965317875146866, "clip_ratio/high_mean": 0.012830346822738647, "clip_ratio/low_mean": 0.01343586202710867, "clip_ratio/low_min": 0.0032573288772255182, "clip_ratio/region_mean": 0.026266207918524742, "entropy": 0.2975466847419739, "epoch": 4.041095890410959, "grad_norm": 1.6629063572160336, "kl": 0.935541033744812, "learning_rate": 3.9914383561643834e-07, "loss": -0.0045, "step": 590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 700.0, "completions/max_terminated_length": 700.0, "completions/mean_length": 150.17857360839844, "completions/mean_terminated_length": 150.17857360839844, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.2860097289085388, "epoch": 4.0479452054794525, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.222539814740797, "kl": 0.7601937651634216, "learning_rate": 3.98972602739726e-07, "loss": 0.0154, "num_tokens": 8621144.0, "reward": 0.6835178732872009, "reward_std": 0.08791854232549667, "rewards/check_gptzero_func/mean": 0.6835178732872009, "rewards/check_gptzero_func/std": 0.35666167736053467, "sampling/importance_sampling_ratio/max": 1.7486625909805298, "sampling/importance_sampling_ratio/mean": 1.0001776218414307, "sampling/importance_sampling_ratio/min": 0.5409767627716064, "sampling/sampling_logp_difference/max": 0.6143789291381836, "sampling/sampling_logp_difference/mean": 0.013321108184754848, "step": 591 }, { "clip_ratio/high_max": 0.02931937202811241, "clip_ratio/high_mean": 0.016402089968323708, "clip_ratio/low_mean": 0.013485140167176723, "clip_ratio/low_min": 0.008016032166779041, "clip_ratio/region_mean": 0.029887229204177856, "entropy": 0.28553324937820435, "epoch": 4.054794520547945, "grad_norm": 1.6682202192842706, "kl": 0.7567993998527527, "learning_rate": 3.9880136986301364e-07, "loss": 0.006, "step": 592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 745.0, "completions/max_terminated_length": 745.0, "completions/mean_length": 129.42857360839844, "completions/mean_terminated_length": 129.42857360839844, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.23775196075439453, "epoch": 4.061643835616438, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 54.73718326513479, "kl": 2.2382564544677734, "learning_rate": 3.9863013698630134e-07, "loss": 0.0384, "num_tokens": 8633472.0, "reward": 0.6668751239776611, "reward_std": 0.12801453471183777, "rewards/check_gptzero_func/mean": 0.6668750643730164, "rewards/check_gptzero_func/std": 0.33868077397346497, "sampling/importance_sampling_ratio/max": 1.8549941778182983, "sampling/importance_sampling_ratio/mean": 1.001452088356018, "sampling/importance_sampling_ratio/min": 0.6065531969070435, "sampling/sampling_logp_difference/max": 0.6178815364837646, "sampling/sampling_logp_difference/mean": 0.013273901306092739, "step": 593 }, { "clip_ratio/high_max": 0.009433962404727936, "clip_ratio/high_mean": 0.004334976430982351, "clip_ratio/low_mean": 0.008976205252110958, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.013311182148754597, "entropy": 0.24442194402217865, "epoch": 4.068493150684931, "grad_norm": 21.49502964400735, "kl": 1.9511362314224243, "learning_rate": 3.9845890410958905e-07, "loss": 0.0373, "step": 594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 549.0, "completions/max_terminated_length": 549.0, "completions/mean_length": 129.125, "completions/mean_terminated_length": 129.125, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.27286496758461, "epoch": 4.075342465753424, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.3497117410140285, "kl": 0.9161044359207153, "learning_rate": 3.982876712328767e-07, "loss": 0.0514, "num_tokens": 8645447.0, "reward": 0.6144685745239258, "reward_std": 0.07668539881706238, "rewards/check_gptzero_func/mean": 0.6144685745239258, "rewards/check_gptzero_func/std": 0.36693912744522095, "sampling/importance_sampling_ratio/max": 1.5741389989852905, "sampling/importance_sampling_ratio/mean": 1.0008724927902222, "sampling/importance_sampling_ratio/min": 0.5359940528869629, "sampling/sampling_logp_difference/max": 0.6236321926116943, "sampling/sampling_logp_difference/mean": 0.014188284054398537, "step": 595 }, { "clip_ratio/high_max": 0.02697095461189747, "clip_ratio/high_mean": 0.01798638515174389, "clip_ratio/low_mean": 0.014321297407150269, "clip_ratio/low_min": 0.002739726100116968, "clip_ratio/region_mean": 0.03230768069624901, "entropy": 0.2742673456668854, "epoch": 4.082191780821918, "grad_norm": 1.6675176778727714, "kl": 0.9118481278419495, "learning_rate": 3.981164383561644e-07, "loss": 0.0405, "step": 596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 589.0, "completions/max_terminated_length": 589.0, "completions/mean_length": 122.08929443359375, "completions/mean_terminated_length": 122.08929443359375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.3134656846523285, "epoch": 4.089041095890411, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.5998324881995276, "kl": 0.86316978931427, "learning_rate": 3.9794520547945205e-07, "loss": 0.0213, "num_tokens": 8657172.0, "reward": 0.6038450002670288, "reward_std": 0.0815328061580658, "rewards/check_gptzero_func/mean": 0.6038450002670288, "rewards/check_gptzero_func/std": 0.37845104932785034, "sampling/importance_sampling_ratio/max": 1.7537422180175781, "sampling/importance_sampling_ratio/mean": 0.9996910095214844, "sampling/importance_sampling_ratio/min": 0.23687830567359924, "sampling/sampling_logp_difference/max": 1.4402087926864624, "sampling/sampling_logp_difference/mean": 0.015933839604258537, "step": 597 }, { "clip_ratio/high_max": 0.028824834153056145, "clip_ratio/high_mean": 0.01933773048222065, "clip_ratio/low_mean": 0.01913398876786232, "clip_ratio/low_min": 0.013530135154724121, "clip_ratio/region_mean": 0.03847172111272812, "entropy": 0.31223440170288086, "epoch": 4.095890410958904, "grad_norm": 1.7211116984277235, "kl": 0.8670538663864136, "learning_rate": 3.977739726027397e-07, "loss": 0.0108, "step": 598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 730.0, "completions/max_terminated_length": 730.0, "completions/mean_length": 114.92857360839844, "completions/mean_terminated_length": 114.92857360839844, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.26103541254997253, "epoch": 4.102739726027397, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.545465040328867, "kl": 0.8923487067222595, "learning_rate": 3.9760273972602735e-07, "loss": -0.0065, "num_tokens": 8668216.0, "reward": 0.640670657157898, "reward_std": 0.11108281463384628, "rewards/check_gptzero_func/mean": 0.640670657157898, "rewards/check_gptzero_func/std": 0.3591608703136444, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9996110796928406, "sampling/importance_sampling_ratio/min": 0.6169658303260803, "sampling/sampling_logp_difference/max": 0.8145751953125, "sampling/sampling_logp_difference/mean": 0.013716030865907669, "step": 599 }, { "clip_ratio/high_max": 0.0235294122248888, "clip_ratio/high_mean": 0.01951347105205059, "clip_ratio/low_mean": 0.021093768998980522, "clip_ratio/low_min": 0.01163918524980545, "clip_ratio/region_mean": 0.04060724005103111, "entropy": 0.2576715350151062, "epoch": 4.109589041095891, "grad_norm": 1.9476222639226028, "kl": 0.9333817362785339, "learning_rate": 3.9743150684931506e-07, "loss": -0.0156, "step": 600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 838.0, "completions/max_terminated_length": 838.0, "completions/mean_length": 121.08929443359375, "completions/mean_terminated_length": 121.08929443359375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.2570456862449646, "epoch": 4.116438356164384, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.726726543074514, "kl": 1.174784541130066, "learning_rate": 3.972602739726027e-07, "loss": -0.0244, "num_tokens": 8679845.0, "reward": 0.5415670871734619, "reward_std": 0.09136220812797546, "rewards/check_gptzero_func/mean": 0.5415670871734619, "rewards/check_gptzero_func/std": 0.36906707286834717, "sampling/importance_sampling_ratio/max": 1.667930006980896, "sampling/importance_sampling_ratio/mean": 1.0004194974899292, "sampling/importance_sampling_ratio/min": 0.5321838855743408, "sampling/sampling_logp_difference/max": 0.6307662725448608, "sampling/sampling_logp_difference/mean": 0.014991969801485538, "step": 601 }, { "clip_ratio/high_max": 0.029335634782910347, "clip_ratio/high_mean": 0.021533383056521416, "clip_ratio/low_mean": 0.01199879590421915, "clip_ratio/low_min": 0.006430868059396744, "clip_ratio/region_mean": 0.03353217989206314, "entropy": 0.25913313031196594, "epoch": 4.123287671232877, "grad_norm": 2.522546842779903, "kl": 1.0365461111068726, "learning_rate": 3.970890410958904e-07, "loss": -0.0345, "step": 602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1040.0, "completions/max_terminated_length": 1040.0, "completions/mean_length": 176.44644165039062, "completions/mean_terminated_length": 176.44644165039062, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.35088637471199036, "epoch": 4.13013698630137, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.9783028586781923, "kl": 0.8905962705612183, "learning_rate": 3.9691780821917806e-07, "loss": 0.0071, "num_tokens": 8694646.0, "reward": 0.5755389928817749, "reward_std": 0.09073697030544281, "rewards/check_gptzero_func/mean": 0.5755389928817749, "rewards/check_gptzero_func/std": 0.30523478984832764, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9995762705802917, "sampling/importance_sampling_ratio/min": 0.48490577936172485, "sampling/sampling_logp_difference/max": 0.7254230976104736, "sampling/sampling_logp_difference/mean": 0.016057446599006653, "step": 603 }, { "clip_ratio/high_max": 0.02166065014898777, "clip_ratio/high_mean": 0.014883080497384071, "clip_ratio/low_mean": 0.013534466736018658, "clip_ratio/low_min": 0.009927798062562943, "clip_ratio/region_mean": 0.028417548164725304, "entropy": 0.35072967410087585, "epoch": 4.136986301369863, "grad_norm": 1.4840110824543038, "kl": 0.8412100672721863, "learning_rate": 3.9674657534246576e-07, "loss": -0.0029, "step": 604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 714.0, "completions/max_terminated_length": 714.0, "completions/mean_length": 136.35714721679688, "completions/mean_terminated_length": 136.35714721679688, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.21569357812404633, "epoch": 4.1438356164383565, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 24.70484198298594, "kl": 2.805986166000366, "learning_rate": 3.965753424657534e-07, "loss": 0.0448, "num_tokens": 8707150.0, "reward": 0.6251951456069946, "reward_std": 0.11350912600755692, "rewards/check_gptzero_func/mean": 0.6251950860023499, "rewards/check_gptzero_func/std": 0.3226304054260254, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9996946454048157, "sampling/importance_sampling_ratio/min": 0.589383065700531, "sampling/sampling_logp_difference/max": 0.7049291133880615, "sampling/sampling_logp_difference/mean": 0.013216568157076836, "step": 605 }, { "clip_ratio/high_max": 0.006903353147208691, "clip_ratio/high_mean": 0.005066410638391972, "clip_ratio/low_mean": 0.005047682207077742, "clip_ratio/low_min": 0.0005712653510272503, "clip_ratio/region_mean": 0.010114093311131, "entropy": 0.2162623405456543, "epoch": 4.1506849315068495, "grad_norm": 2.3364537483459205, "kl": 1.1484240293502808, "learning_rate": 3.9640410958904106e-07, "loss": 0.028, "step": 606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 992.0, "completions/max_terminated_length": 992.0, "completions/mean_length": 195.8928680419922, "completions/mean_terminated_length": 195.8928680419922, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.39961618185043335, "epoch": 4.157534246575342, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.076319341442233, "kl": 0.7662903666496277, "learning_rate": 3.9623287671232877e-07, "loss": 0.0189, "num_tokens": 8723210.0, "reward": 0.6608148813247681, "reward_std": 0.062077999114990234, "rewards/check_gptzero_func/mean": 0.6608148217201233, "rewards/check_gptzero_func/std": 0.2868780493736267, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.000377893447876, "sampling/importance_sampling_ratio/min": 0.45994558930397034, "sampling/sampling_logp_difference/max": 1.205946922302246, "sampling/sampling_logp_difference/mean": 0.016397034749388695, "step": 607 }, { "clip_ratio/high_max": 0.022549020126461983, "clip_ratio/high_mean": 0.013227726332843304, "clip_ratio/low_mean": 0.012647675350308418, "clip_ratio/low_min": 0.007766990456730127, "clip_ratio/region_mean": 0.025875402614474297, "entropy": 0.39926519989967346, "epoch": 4.164383561643835, "grad_norm": 1.6408632670354197, "kl": 0.7760101556777954, "learning_rate": 3.960616438356164e-07, "loss": 0.0092, "step": 608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 739.0, "completions/max_terminated_length": 739.0, "completions/mean_length": 117.39286041259766, "completions/mean_terminated_length": 117.39286041259766, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.22391533851623535, "epoch": 4.171232876712328, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.351895279342902, "kl": 1.1515928506851196, "learning_rate": 3.9589041095890407e-07, "loss": 0.0152, "num_tokens": 8734994.0, "reward": 0.6037253141403198, "reward_std": 0.0774848461151123, "rewards/check_gptzero_func/mean": 0.6037253141403198, "rewards/check_gptzero_func/std": 0.3129538893699646, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.000401258468628, "sampling/importance_sampling_ratio/min": 0.6438078284263611, "sampling/sampling_logp_difference/max": 0.7252029180526733, "sampling/sampling_logp_difference/mean": 0.01264944951981306, "step": 609 }, { "clip_ratio/high_max": 0.018141835927963257, "clip_ratio/high_mean": 0.013346178457140923, "clip_ratio/low_mean": 0.016187964007258415, "clip_ratio/low_min": 0.009333333000540733, "clip_ratio/region_mean": 0.029534142464399338, "entropy": 0.22190651297569275, "epoch": 4.178082191780822, "grad_norm": 3.1580131543692893, "kl": 1.2636559009552002, "learning_rate": 3.957191780821917e-07, "loss": 0.0079, "step": 610 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 900.0, "completions/max_terminated_length": 900.0, "completions/mean_length": 149.6428680419922, "completions/mean_terminated_length": 149.6428680419922, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.30911269783973694, "epoch": 4.184931506849315, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.196785115470032, "kl": 0.9132189154624939, "learning_rate": 3.955479452054795e-07, "loss": 0.0483, "num_tokens": 8748564.0, "reward": 0.5267507433891296, "reward_std": 0.08353448659181595, "rewards/check_gptzero_func/mean": 0.5267507433891296, "rewards/check_gptzero_func/std": 0.3044106364250183, "sampling/importance_sampling_ratio/max": 1.6272691488265991, "sampling/importance_sampling_ratio/mean": 0.9998162984848022, "sampling/importance_sampling_ratio/min": 0.5039598941802979, "sampling/sampling_logp_difference/max": 0.6852586269378662, "sampling/sampling_logp_difference/mean": 0.015218522399663925, "step": 611 }, { "clip_ratio/high_max": 0.019275249913334846, "clip_ratio/high_mean": 0.015120421536266804, "clip_ratio/low_mean": 0.01698940061032772, "clip_ratio/low_min": 0.00944669358432293, "clip_ratio/region_mean": 0.0321098230779171, "entropy": 0.3092518746852875, "epoch": 4.191780821917808, "grad_norm": 1.6838625384637003, "kl": 0.9087098836898804, "learning_rate": 3.953767123287671e-07, "loss": 0.038, "step": 612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1225.0, "completions/max_terminated_length": 1225.0, "completions/mean_length": 148.48214721679688, "completions/mean_terminated_length": 148.48214721679688, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.3122602105140686, "epoch": 4.198630136986301, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.2985925229395368, "kl": 0.9146066308021545, "learning_rate": 3.952054794520548e-07, "loss": -0.0122, "num_tokens": 8761627.0, "reward": 0.6041033267974854, "reward_std": 0.11189064383506775, "rewards/check_gptzero_func/mean": 0.6041032671928406, "rewards/check_gptzero_func/std": 0.33021971583366394, "sampling/importance_sampling_ratio/max": 1.6180284023284912, "sampling/importance_sampling_ratio/mean": 0.9997270703315735, "sampling/importance_sampling_ratio/min": 0.6182499527931213, "sampling/sampling_logp_difference/max": 0.48120832443237305, "sampling/sampling_logp_difference/mean": 0.015053985640406609, "step": 613 }, { "clip_ratio/high_max": 0.02488231286406517, "clip_ratio/high_mean": 0.015122351236641407, "clip_ratio/low_mean": 0.014240480959415436, "clip_ratio/low_min": 0.004201680887490511, "clip_ratio/region_mean": 0.029362831264734268, "entropy": 0.3127194046974182, "epoch": 4.205479452054795, "grad_norm": 1.9130267103142513, "kl": 0.9225660562515259, "learning_rate": 3.950342465753425e-07, "loss": -0.0219, "step": 614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 231.60714721679688, "completions/mean_terminated_length": 231.60714721679688, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "entropy": 0.3265451490879059, "epoch": 4.212328767123288, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.1172277171966303, "kl": 0.8295098543167114, "learning_rate": 3.9486301369863013e-07, "loss": -0.0128, "num_tokens": 8779699.0, "reward": 0.6535198092460632, "reward_std": 0.0879463255405426, "rewards/check_gptzero_func/mean": 0.6535197496414185, "rewards/check_gptzero_func/std": 0.3303973376750946, "sampling/importance_sampling_ratio/max": 1.560412883758545, "sampling/importance_sampling_ratio/mean": 0.9994515180587769, "sampling/importance_sampling_ratio/min": 0.6117782592773438, "sampling/sampling_logp_difference/max": 0.49138545989990234, "sampling/sampling_logp_difference/mean": 0.014291101135313511, "step": 615 }, { "clip_ratio/high_max": 0.016012199223041534, "clip_ratio/high_mean": 0.013153442181646824, "clip_ratio/low_mean": 0.012512288056313992, "clip_ratio/low_min": 0.007805724162608385, "clip_ratio/region_mean": 0.025665730237960815, "entropy": 0.32376521825790405, "epoch": 4.219178082191781, "grad_norm": 2.1050860477097797, "kl": 0.8766402006149292, "learning_rate": 3.946917808219178e-07, "loss": -0.0209, "step": 616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 798.0, "completions/max_terminated_length": 798.0, "completions/mean_length": 131.2857208251953, "completions/mean_terminated_length": 131.2857208251953, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.19182154536247253, "epoch": 4.226027397260274, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.649164260438122, "kl": 0.942517101764679, "learning_rate": 3.9452054794520543e-07, "loss": 0.0105, "num_tokens": 8792151.0, "reward": 0.6547342538833618, "reward_std": 0.08771185576915741, "rewards/check_gptzero_func/mean": 0.654734194278717, "rewards/check_gptzero_func/std": 0.3677164614200592, "sampling/importance_sampling_ratio/max": 1.6220811605453491, "sampling/importance_sampling_ratio/mean": 1.0003254413604736, "sampling/importance_sampling_ratio/min": 0.44629108905792236, "sampling/sampling_logp_difference/max": 0.80678391456604, "sampling/sampling_logp_difference/mean": 0.010611857287585735, "step": 617 }, { "clip_ratio/high_max": 0.01672862470149994, "clip_ratio/high_mean": 0.011689205653965473, "clip_ratio/low_mean": 0.015226549468934536, "clip_ratio/low_min": 0.003992015961557627, "clip_ratio/region_mean": 0.02691575512290001, "entropy": 0.1875544935464859, "epoch": 4.232876712328767, "grad_norm": 3.2723539730528883, "kl": 1.0765511989593506, "learning_rate": 3.9434931506849313e-07, "loss": 0.004, "step": 618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 835.0, "completions/max_terminated_length": 835.0, "completions/mean_length": 139.5178680419922, "completions/mean_terminated_length": 139.5178680419922, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.26600155234336853, "epoch": 4.239726027397261, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.2951128323976953, "kl": 0.9739308953285217, "learning_rate": 3.941780821917808e-07, "loss": -0.0252, "num_tokens": 8804314.0, "reward": 0.5033829212188721, "reward_std": 0.11966950446367264, "rewards/check_gptzero_func/mean": 0.5033828616142273, "rewards/check_gptzero_func/std": 0.3552757501602173, "sampling/importance_sampling_ratio/max": 1.650652527809143, "sampling/importance_sampling_ratio/mean": 0.9992604851722717, "sampling/importance_sampling_ratio/min": 0.548938512802124, "sampling/sampling_logp_difference/max": 0.599768877029419, "sampling/sampling_logp_difference/mean": 0.014853657223284245, "step": 619 }, { "clip_ratio/high_max": 0.02517162449657917, "clip_ratio/high_mean": 0.019363855943083763, "clip_ratio/low_mean": 0.017016630619764328, "clip_ratio/low_min": 0.010297482833266258, "clip_ratio/region_mean": 0.03638048842549324, "entropy": 0.2677595615386963, "epoch": 4.2465753424657535, "grad_norm": 2.4056576924039463, "kl": 0.9517071843147278, "learning_rate": 3.940068493150685e-07, "loss": -0.0342, "step": 620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1068.0, "completions/max_terminated_length": 1068.0, "completions/mean_length": 127.21429443359375, "completions/mean_terminated_length": 127.21429443359375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.19725243747234344, "epoch": 4.2534246575342465, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.3315143812776022, "kl": 1.123866081237793, "learning_rate": 3.938356164383562e-07, "loss": 0.0076, "num_tokens": 8817012.0, "reward": 0.6642140746116638, "reward_std": 0.10195234417915344, "rewards/check_gptzero_func/mean": 0.6642140746116638, "rewards/check_gptzero_func/std": 0.3392430245876312, "sampling/importance_sampling_ratio/max": 1.725131630897522, "sampling/importance_sampling_ratio/mean": 1.0000941753387451, "sampling/importance_sampling_ratio/min": 0.4740407168865204, "sampling/sampling_logp_difference/max": 0.7464621067047119, "sampling/sampling_logp_difference/mean": 0.012620468623936176, "step": 621 }, { "clip_ratio/high_max": 0.027508091181516647, "clip_ratio/high_mean": 0.016258662566542625, "clip_ratio/low_mean": 0.01683034934103489, "clip_ratio/low_min": 0.007751937955617905, "clip_ratio/region_mean": 0.033089008182287216, "entropy": 0.19543194770812988, "epoch": 4.260273972602739, "grad_norm": 2.049548453349087, "kl": 1.1210832595825195, "learning_rate": 3.9366438356164384e-07, "loss": -0.0022, "step": 622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 691.0, "completions/max_terminated_length": 691.0, "completions/mean_length": 126.37500762939453, "completions/mean_terminated_length": 126.37500762939453, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.3714003264904022, "epoch": 4.267123287671233, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.487953063216922, "kl": 0.9116410613059998, "learning_rate": 3.934931506849315e-07, "loss": -0.005, "num_tokens": 8828471.0, "reward": 0.637628972530365, "reward_std": 0.09365466982126236, "rewards/check_gptzero_func/mean": 0.637628972530365, "rewards/check_gptzero_func/std": 0.3237457871437073, "sampling/importance_sampling_ratio/max": 1.4366209506988525, "sampling/importance_sampling_ratio/mean": 0.9998444318771362, "sampling/importance_sampling_ratio/min": 0.13973867893218994, "sampling/sampling_logp_difference/max": 1.9679811000823975, "sampling/sampling_logp_difference/mean": 0.015387365594506264, "step": 623 }, { "clip_ratio/high_max": 0.026425590738654137, "clip_ratio/high_mean": 0.018460288643836975, "clip_ratio/low_mean": 0.0151884276419878, "clip_ratio/low_min": 0.00804289523512125, "clip_ratio/region_mean": 0.033648718148469925, "entropy": 0.37073057889938354, "epoch": 4.273972602739726, "grad_norm": 1.8966956554167396, "kl": 0.9188264012336731, "learning_rate": 3.9332191780821914e-07, "loss": -0.0158, "step": 624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 807.0, "completions/max_terminated_length": 807.0, "completions/mean_length": 131.375, "completions/mean_terminated_length": 131.375, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.22631920874118805, "epoch": 4.280821917808219, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.037250321401866, "kl": 0.9580695033073425, "learning_rate": 3.9315068493150684e-07, "loss": -0.0245, "num_tokens": 8840828.0, "reward": 0.6434494256973267, "reward_std": 0.04960473254323006, "rewards/check_gptzero_func/mean": 0.6434494256973267, "rewards/check_gptzero_func/std": 0.34935590624809265, "sampling/importance_sampling_ratio/max": 1.4618169069290161, "sampling/importance_sampling_ratio/mean": 1.000613808631897, "sampling/importance_sampling_ratio/min": 0.5751617550849915, "sampling/sampling_logp_difference/max": 0.5531039237976074, "sampling/sampling_logp_difference/mean": 0.012398441322147846, "step": 625 }, { "clip_ratio/high_max": 0.023019034415483475, "clip_ratio/high_mean": 0.01568385399878025, "clip_ratio/low_mean": 0.011974958702921867, "clip_ratio/low_min": 0.00401606410741806, "clip_ratio/region_mean": 0.02765881083905697, "entropy": 0.2262718379497528, "epoch": 4.287671232876712, "grad_norm": 1.7704493996507007, "kl": 0.9622231721878052, "learning_rate": 3.929794520547945e-07, "loss": -0.0339, "step": 626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 875.0, "completions/max_terminated_length": 875.0, "completions/mean_length": 117.35714721679688, "completions/mean_terminated_length": 117.35714721679688, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 0.2261694222688675, "epoch": 4.294520547945205, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.2659278908637073, "kl": 1.1185520887374878, "learning_rate": 3.9280821917808215e-07, "loss": 0.0211, "num_tokens": 8852460.0, "reward": 0.6804222464561462, "reward_std": 0.07586143165826797, "rewards/check_gptzero_func/mean": 0.6804222464561462, "rewards/check_gptzero_func/std": 0.3226708471775055, "sampling/importance_sampling_ratio/max": 1.4775094985961914, "sampling/importance_sampling_ratio/mean": 0.9996953010559082, "sampling/importance_sampling_ratio/min": 0.3683105409145355, "sampling/sampling_logp_difference/max": 0.9988288879394531, "sampling/sampling_logp_difference/mean": 0.013150004670023918, "step": 627 }, { "clip_ratio/high_max": 0.030612245202064514, "clip_ratio/high_mean": 0.019071267917752266, "clip_ratio/low_mean": 0.018178189173340797, "clip_ratio/low_min": 0.010440835729241371, "clip_ratio/region_mean": 0.037249453365802765, "entropy": 0.2259281426668167, "epoch": 4.301369863013699, "grad_norm": 4.798267888514274, "kl": 1.3015397787094116, "learning_rate": 3.926369863013698e-07, "loss": 0.0134, "step": 628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 838.0, "completions/max_terminated_length": 838.0, "completions/mean_length": 142.875, "completions/mean_terminated_length": 142.875, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.2722335159778595, "epoch": 4.308219178082192, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.693923171493087, "kl": 0.8549391627311707, "learning_rate": 3.9246575342465755e-07, "loss": 0.0235, "num_tokens": 8865523.0, "reward": 0.7065127491950989, "reward_std": 0.06332481652498245, "rewards/check_gptzero_func/mean": 0.7065127491950989, "rewards/check_gptzero_func/std": 0.32122868299484253, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0011112689971924, "sampling/importance_sampling_ratio/min": 0.47559434175491333, "sampling/sampling_logp_difference/max": 0.7431900501251221, "sampling/sampling_logp_difference/mean": 0.014315311796963215, "step": 629 }, { "clip_ratio/high_max": 0.017595307901501656, "clip_ratio/high_mean": 0.01175676379352808, "clip_ratio/low_mean": 0.013404221273958683, "clip_ratio/low_min": 0.0034895313438028097, "clip_ratio/region_mean": 0.025160985067486763, "entropy": 0.2692456841468811, "epoch": 4.315068493150685, "grad_norm": 1.933795434604208, "kl": 0.890365481376648, "learning_rate": 3.922945205479452e-07, "loss": 0.0169, "step": 630 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 932.0, "completions/max_terminated_length": 932.0, "completions/mean_length": 181.92857360839844, "completions/mean_terminated_length": 181.92857360839844, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.350252628326416, "epoch": 4.321917808219178, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.9547660232982456, "kl": 1.0644925832748413, "learning_rate": 3.9212328767123285e-07, "loss": -0.0189, "num_tokens": 8880353.0, "reward": 0.6650454998016357, "reward_std": 0.1003192737698555, "rewards/check_gptzero_func/mean": 0.665045440196991, "rewards/check_gptzero_func/std": 0.26354190707206726, "sampling/importance_sampling_ratio/max": 1.7520533800125122, "sampling/importance_sampling_ratio/mean": 1.0001065731048584, "sampling/importance_sampling_ratio/min": 0.6071458458900452, "sampling/sampling_logp_difference/max": 0.5607883930206299, "sampling/sampling_logp_difference/mean": 0.017130451276898384, "step": 631 }, { "clip_ratio/high_max": 0.02801724150776863, "clip_ratio/high_mean": 0.01887441985309124, "clip_ratio/low_mean": 0.014271458610892296, "clip_ratio/low_min": 0.008130080997943878, "clip_ratio/region_mean": 0.033145878463983536, "entropy": 0.3508033752441406, "epoch": 4.328767123287671, "grad_norm": 1.6159449164907063, "kl": 1.073303461074829, "learning_rate": 3.9195205479452056e-07, "loss": -0.0291, "step": 632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 732.0, "completions/max_terminated_length": 732.0, "completions/mean_length": 142.32144165039062, "completions/mean_terminated_length": 142.32144165039062, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.27161774039268494, "epoch": 4.335616438356165, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.271591142831869, "kl": 1.034026026725769, "learning_rate": 3.917808219178082e-07, "loss": 0.0425, "num_tokens": 8893397.0, "reward": 0.5336751937866211, "reward_std": 0.11647245287895203, "rewards/check_gptzero_func/mean": 0.5336751341819763, "rewards/check_gptzero_func/std": 0.3026014566421509, "sampling/importance_sampling_ratio/max": 1.5182493925094604, "sampling/importance_sampling_ratio/mean": 0.9999902844429016, "sampling/importance_sampling_ratio/min": 0.4871218800544739, "sampling/sampling_logp_difference/max": 0.7192409038543701, "sampling/sampling_logp_difference/mean": 0.015041567385196686, "step": 633 }, { "clip_ratio/high_max": 0.02131018228828907, "clip_ratio/high_mean": 0.015568592585623264, "clip_ratio/low_mean": 0.020216461271047592, "clip_ratio/low_min": 0.011070110835134983, "clip_ratio/region_mean": 0.03578505292534828, "entropy": 0.27212393283843994, "epoch": 4.342465753424658, "grad_norm": 2.108625012414137, "kl": 1.012105107307434, "learning_rate": 3.9160958904109586e-07, "loss": 0.0353, "step": 634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1166.0, "completions/max_terminated_length": 1166.0, "completions/mean_length": 166.85714721679688, "completions/mean_terminated_length": 166.85714721679688, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.3254454433917999, "epoch": 4.3493150684931505, "frac_reward_zero_std": 0.0, "grad_norm": 2.3260721101816633, "kl": 1.051446557044983, "learning_rate": 3.914383561643835e-07, "loss": 0.008, "num_tokens": 8907941.0, "reward": 0.6034669876098633, "reward_std": 0.10841286927461624, "rewards/check_gptzero_func/mean": 0.6034669876098633, "rewards/check_gptzero_func/std": 0.3325459957122803, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.000272512435913, "sampling/importance_sampling_ratio/min": 0.5879095196723938, "sampling/sampling_logp_difference/max": 0.9442927837371826, "sampling/sampling_logp_difference/mean": 0.016013575717806816, "step": 635 }, { "clip_ratio/high_max": 0.02733006328344345, "clip_ratio/high_mean": 0.018088316544890404, "clip_ratio/low_mean": 0.01721796952188015, "clip_ratio/low_min": 0.012411347590386868, "clip_ratio/region_mean": 0.035306286066770554, "entropy": 0.323632150888443, "epoch": 4.3561643835616435, "grad_norm": 1.9045071025995248, "kl": 1.0602484941482544, "learning_rate": 3.912671232876712e-07, "loss": -0.0026, "step": 636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 902.0, "completions/max_terminated_length": 902.0, "completions/mean_length": 176.32144165039062, "completions/mean_terminated_length": 176.32144165039062, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.3843434453010559, "epoch": 4.363013698630137, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.0498663736764344, "kl": 0.8650074005126953, "learning_rate": 3.9109589041095886e-07, "loss": -0.0303, "num_tokens": 8922655.0, "reward": 0.6947792768478394, "reward_std": 0.017264477908611298, "rewards/check_gptzero_func/mean": 0.6947792768478394, "rewards/check_gptzero_func/std": 0.29185977578163147, "sampling/importance_sampling_ratio/max": 1.5971448421478271, "sampling/importance_sampling_ratio/mean": 0.9998512268066406, "sampling/importance_sampling_ratio/min": 0.5658109784126282, "sampling/sampling_logp_difference/max": 0.5694952011108398, "sampling/sampling_logp_difference/mean": 0.01680566743016243, "step": 637 }, { "clip_ratio/high_max": 0.028205128386616707, "clip_ratio/high_mean": 0.017112450674176216, "clip_ratio/low_mean": 0.013297689147293568, "clip_ratio/low_min": 0.007996002212166786, "clip_ratio/region_mean": 0.03041014075279236, "entropy": 0.3866545259952545, "epoch": 4.36986301369863, "grad_norm": 1.7130496276390543, "kl": 0.8377960920333862, "learning_rate": 3.9092465753424656e-07, "loss": -0.0401, "step": 638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 833.0, "completions/max_terminated_length": 833.0, "completions/mean_length": 113.12500762939453, "completions/mean_terminated_length": 113.12500762939453, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.20652954280376434, "epoch": 4.376712328767123, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.710439955501259, "kl": 1.0329128503799438, "learning_rate": 3.9075342465753427e-07, "loss": -0.0265, "num_tokens": 8933690.0, "reward": 0.7538745403289795, "reward_std": 0.10309935361146927, "rewards/check_gptzero_func/mean": 0.7538744807243347, "rewards/check_gptzero_func/std": 0.2633333206176758, "sampling/importance_sampling_ratio/max": 1.4617420434951782, "sampling/importance_sampling_ratio/mean": 1.000872015953064, "sampling/importance_sampling_ratio/min": 0.6170312762260437, "sampling/sampling_logp_difference/max": 0.4828355312347412, "sampling/sampling_logp_difference/mean": 0.011273923330008984, "step": 639 }, { "clip_ratio/high_max": 0.031093835830688477, "clip_ratio/high_mean": 0.015430399216711521, "clip_ratio/low_mean": 0.01709115318953991, "clip_ratio/low_min": 0.00929368007928133, "clip_ratio/region_mean": 0.03252154961228371, "entropy": 0.20634673535823822, "epoch": 4.383561643835616, "grad_norm": 1.9094514616836, "kl": 1.0238704681396484, "learning_rate": 3.905821917808219e-07, "loss": -0.0364, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 825.0, "completions/max_terminated_length": 825.0, "completions/mean_length": 176.17857360839844, "completions/mean_terminated_length": 176.17857360839844, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.33825185894966125, "epoch": 4.390410958904109, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.422915795337009, "kl": 0.8287263512611389, "learning_rate": 3.9041095890410957e-07, "loss": 0.0214, "num_tokens": 8948520.0, "reward": 0.6109718680381775, "reward_std": 0.07670561969280243, "rewards/check_gptzero_func/mean": 0.6109718680381775, "rewards/check_gptzero_func/std": 0.36024534702301025, "sampling/importance_sampling_ratio/max": 1.826147198677063, "sampling/importance_sampling_ratio/mean": 1.000044345855713, "sampling/importance_sampling_ratio/min": 0.609107255935669, "sampling/sampling_logp_difference/max": 0.6022083759307861, "sampling/sampling_logp_difference/mean": 0.015922434628009796, "step": 641 }, { "clip_ratio/high_max": 0.02576112374663353, "clip_ratio/high_mean": 0.0173344649374485, "clip_ratio/low_mean": 0.011021203361451626, "clip_ratio/low_min": 0.002369668334722519, "clip_ratio/region_mean": 0.02835567109286785, "entropy": 0.3397963345050812, "epoch": 4.397260273972603, "grad_norm": 1.9055582596969158, "kl": 0.712722659111023, "learning_rate": 3.902397260273972e-07, "loss": 0.0107, "step": 642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 764.0, "completions/max_terminated_length": 764.0, "completions/mean_length": 116.96429443359375, "completions/mean_terminated_length": 116.96429443359375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.26834824681282043, "epoch": 4.404109589041096, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.4852541203020277, "kl": 1.1583020687103271, "learning_rate": 3.900684931506849e-07, "loss": -0.0201, "num_tokens": 8959552.0, "reward": 0.522674024105072, "reward_std": 0.04811473563313484, "rewards/check_gptzero_func/mean": 0.522674024105072, "rewards/check_gptzero_func/std": 0.3191637396812439, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.001054048538208, "sampling/importance_sampling_ratio/min": 0.5515109896659851, "sampling/sampling_logp_difference/max": 0.7246332168579102, "sampling/sampling_logp_difference/mean": 0.014864161610603333, "step": 643 }, { "clip_ratio/high_max": 0.033203125, "clip_ratio/high_mean": 0.02238781377673149, "clip_ratio/low_mean": 0.018522383645176888, "clip_ratio/low_min": 0.00827814545482397, "clip_ratio/region_mean": 0.04091019555926323, "entropy": 0.2678822875022888, "epoch": 4.410958904109589, "grad_norm": 2.0896608688700975, "kl": 1.1751797199249268, "learning_rate": 3.8989726027397257e-07, "loss": -0.0314, "step": 644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 836.0, "completions/max_terminated_length": 836.0, "completions/mean_length": 122.60714721679688, "completions/mean_terminated_length": 122.60714721679688, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.2929806113243103, "epoch": 4.417808219178082, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.388823639936817, "kl": 0.8888079524040222, "learning_rate": 3.897260273972602e-07, "loss": -0.0012, "num_tokens": 8971214.0, "reward": 0.6711766719818115, "reward_std": 0.06456080079078674, "rewards/check_gptzero_func/mean": 0.6711766123771667, "rewards/check_gptzero_func/std": 0.28169581294059753, "sampling/importance_sampling_ratio/max": 1.6091113090515137, "sampling/importance_sampling_ratio/mean": 1.0001709461212158, "sampling/importance_sampling_ratio/min": 0.7129236459732056, "sampling/sampling_logp_difference/max": 0.47568202018737793, "sampling/sampling_logp_difference/mean": 0.01372771430760622, "step": 645 }, { "clip_ratio/high_max": 0.028860028833150864, "clip_ratio/high_mean": 0.01378709264099598, "clip_ratio/low_mean": 0.014922111295163631, "clip_ratio/low_min": 0.01011804398149252, "clip_ratio/region_mean": 0.028709203004837036, "entropy": 0.29206418991088867, "epoch": 4.424657534246576, "grad_norm": 1.9005240930866627, "kl": 0.8753501176834106, "learning_rate": 3.895547945205479e-07, "loss": -0.0117, "step": 646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 722.0, "completions/max_terminated_length": 722.0, "completions/mean_length": 106.16072082519531, "completions/mean_terminated_length": 106.16072082519531, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.2026563584804535, "epoch": 4.431506849315069, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.999146627578954, "kl": 1.23471200466156, "learning_rate": 3.8938356164383563e-07, "loss": 0.0134, "num_tokens": 8981935.0, "reward": 0.6032712459564209, "reward_std": 0.11830037832260132, "rewards/check_gptzero_func/mean": 0.6032711863517761, "rewards/check_gptzero_func/std": 0.35289266705513, "sampling/importance_sampling_ratio/max": 1.629333257675171, "sampling/importance_sampling_ratio/mean": 0.9993212819099426, "sampling/importance_sampling_ratio/min": 0.6262168884277344, "sampling/sampling_logp_difference/max": 0.488170862197876, "sampling/sampling_logp_difference/mean": 0.012732559815049171, "step": 647 }, { "clip_ratio/high_max": 0.026740238070487976, "clip_ratio/high_mean": 0.016523156315088272, "clip_ratio/low_mean": 0.01697821542620659, "clip_ratio/low_min": 0.006339144427329302, "clip_ratio/region_mean": 0.03350137546658516, "entropy": 0.20574426651000977, "epoch": 4.438356164383562, "grad_norm": 2.2023479162350688, "kl": 1.1474545001983643, "learning_rate": 3.892123287671233e-07, "loss": 0.0035, "step": 648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1124.0, "completions/max_terminated_length": 1124.0, "completions/mean_length": 175.21429443359375, "completions/mean_terminated_length": 175.21429443359375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.34642642736434937, "epoch": 4.445205479452055, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.2562611138161093, "kl": 0.9177597165107727, "learning_rate": 3.8904109589041093e-07, "loss": -0.0047, "num_tokens": 8996123.0, "reward": 0.5488199591636658, "reward_std": 0.10028939694166183, "rewards/check_gptzero_func/mean": 0.5488199591636658, "rewards/check_gptzero_func/std": 0.3769046664237976, "sampling/importance_sampling_ratio/max": 1.9046767950057983, "sampling/importance_sampling_ratio/mean": 0.9997296929359436, "sampling/importance_sampling_ratio/min": 0.6119533181190491, "sampling/sampling_logp_difference/max": 0.6443123817443848, "sampling/sampling_logp_difference/mean": 0.017559409141540527, "step": 649 }, { "clip_ratio/high_max": 0.024853801354765892, "clip_ratio/high_mean": 0.017264259979128838, "clip_ratio/low_mean": 0.01682942546904087, "clip_ratio/low_min": 0.010564358904957771, "clip_ratio/region_mean": 0.03409368544816971, "entropy": 0.3461153507232666, "epoch": 4.4520547945205475, "grad_norm": 2.3777245114501038, "kl": 1.0098634958267212, "learning_rate": 3.8886986301369863e-07, "loss": -0.0146, "step": 650 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 985.0, "completions/max_terminated_length": 985.0, "completions/mean_length": 162.08929443359375, "completions/mean_terminated_length": 162.08929443359375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.2988508343696594, "epoch": 4.458904109589041, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.0785687333527485, "kl": 0.8521581888198853, "learning_rate": 3.886986301369863e-07, "loss": 0.0102, "num_tokens": 9010154.0, "reward": 0.6803397536277771, "reward_std": 0.11951295286417007, "rewards/check_gptzero_func/mean": 0.6803397536277771, "rewards/check_gptzero_func/std": 0.297585666179657, "sampling/importance_sampling_ratio/max": 1.5827263593673706, "sampling/importance_sampling_ratio/mean": 1.0004642009735107, "sampling/importance_sampling_ratio/min": 0.48129209876060486, "sampling/sampling_logp_difference/max": 0.7312809824943542, "sampling/sampling_logp_difference/mean": 0.014213763177394867, "step": 651 }, { "clip_ratio/high_max": 0.02221018448472023, "clip_ratio/high_mean": 0.013112385757267475, "clip_ratio/low_mean": 0.015015487559139729, "clip_ratio/low_min": 0.010638297535479069, "clip_ratio/region_mean": 0.028127869591116905, "entropy": 0.29736635088920593, "epoch": 4.465753424657534, "grad_norm": 1.6148470350519608, "kl": 0.8564802408218384, "learning_rate": 3.8852739726027393e-07, "loss": -0.0005, "step": 652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 936.0, "completions/max_terminated_length": 936.0, "completions/mean_length": 184.37501525878906, "completions/mean_terminated_length": 184.37501525878906, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.415770560503006, "epoch": 4.472602739726027, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.13719693232277, "kl": 1.0160434246063232, "learning_rate": 3.8835616438356164e-07, "loss": 0.0313, "num_tokens": 9025293.0, "reward": 0.5345319509506226, "reward_std": 0.09060639888048172, "rewards/check_gptzero_func/mean": 0.5345319509506226, "rewards/check_gptzero_func/std": 0.28864291310310364, "sampling/importance_sampling_ratio/max": 1.8055168390274048, "sampling/importance_sampling_ratio/mean": 1.00021231174469, "sampling/importance_sampling_ratio/min": 0.6157156229019165, "sampling/sampling_logp_difference/max": 0.5908468961715698, "sampling/sampling_logp_difference/mean": 0.018574148416519165, "step": 653 }, { "clip_ratio/high_max": 0.020654045045375824, "clip_ratio/high_mean": 0.016424188390374184, "clip_ratio/low_mean": 0.016985436901450157, "clip_ratio/low_min": 0.011270491406321526, "clip_ratio/region_mean": 0.03340962529182434, "entropy": 0.4147346317768097, "epoch": 4.47945205479452, "grad_norm": 2.0662723454455505, "kl": 1.0568493604660034, "learning_rate": 3.881849315068493e-07, "loss": 0.0204, "step": 654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 859.0, "completions/max_terminated_length": 859.0, "completions/mean_length": 140.83929443359375, "completions/mean_terminated_length": 140.83929443359375, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.3416009843349457, "epoch": 4.486301369863014, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.2926224194249434, "kl": 1.0027762651443481, "learning_rate": 3.8801369863013694e-07, "loss": -0.0013, "num_tokens": 9037836.0, "reward": 0.5940163731575012, "reward_std": 0.086007259786129, "rewards/check_gptzero_func/mean": 0.5940163731575012, "rewards/check_gptzero_func/std": 0.3175897002220154, "sampling/importance_sampling_ratio/max": 1.5514122247695923, "sampling/importance_sampling_ratio/mean": 0.9991640448570251, "sampling/importance_sampling_ratio/min": 0.6156715750694275, "sampling/sampling_logp_difference/max": 0.48504161834716797, "sampling/sampling_logp_difference/mean": 0.01549508422613144, "step": 655 }, { "clip_ratio/high_max": 0.026284348219633102, "clip_ratio/high_mean": 0.016345342621207237, "clip_ratio/low_mean": 0.014628792181611061, "clip_ratio/low_min": 0.004166666883975267, "clip_ratio/region_mean": 0.030974136665463448, "entropy": 0.3391432762145996, "epoch": 4.493150684931507, "grad_norm": 1.8352841394314239, "kl": 1.0195060968399048, "learning_rate": 3.8784246575342464e-07, "loss": -0.0124, "step": 656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1206.0, "completions/max_terminated_length": 1206.0, "completions/mean_length": 152.17857360839844, "completions/mean_terminated_length": 152.17857360839844, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.37555381655693054, "epoch": 4.5, "frac_reward_zero_std": 0.0, "grad_norm": 2.812685379570107, "kl": 1.138951301574707, "learning_rate": 3.8767123287671235e-07, "loss": 0.0121, "num_tokens": 9051248.0, "reward": 0.508371114730835, "reward_std": 0.07386896014213562, "rewards/check_gptzero_func/mean": 0.508371114730835, "rewards/check_gptzero_func/std": 0.3414282202720642, "sampling/importance_sampling_ratio/max": 1.7046136856079102, "sampling/importance_sampling_ratio/mean": 0.9995776414871216, "sampling/importance_sampling_ratio/min": 0.6379302740097046, "sampling/sampling_logp_difference/max": 0.5333385467529297, "sampling/sampling_logp_difference/mean": 0.016184281557798386, "step": 657 }, { "clip_ratio/high_max": 0.02304147556424141, "clip_ratio/high_mean": 0.0180036723613739, "clip_ratio/low_mean": 0.015921946614980698, "clip_ratio/low_min": 0.014011799357831478, "clip_ratio/region_mean": 0.0339256189763546, "entropy": 0.37386420369148254, "epoch": 4.506849315068493, "grad_norm": 2.276399545602087, "kl": 0.947195827960968, "learning_rate": 3.875e-07, "loss": 0.0007, "step": 658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 937.0, "completions/max_terminated_length": 937.0, "completions/mean_length": 233.2678680419922, "completions/mean_terminated_length": 233.2678680419922, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.4299100339412689, "epoch": 4.513698630136986, "frac_reward_zero_std": 0.0, "grad_norm": 1.7985089262273537, "kl": 0.7209532856941223, "learning_rate": 3.8732876712328765e-07, "loss": 0.0083, "num_tokens": 9068943.0, "reward": 0.5439475178718567, "reward_std": 0.10493887215852737, "rewards/check_gptzero_func/mean": 0.5439475178718567, "rewards/check_gptzero_func/std": 0.21257567405700684, "sampling/importance_sampling_ratio/max": 1.6045156717300415, "sampling/importance_sampling_ratio/mean": 0.9983581900596619, "sampling/importance_sampling_ratio/min": 0.5558361411094666, "sampling/sampling_logp_difference/max": 0.5872817039489746, "sampling/sampling_logp_difference/mean": 0.019271600991487503, "step": 659 }, { "clip_ratio/high_max": 0.022708840668201447, "clip_ratio/high_mean": 0.016908908262848854, "clip_ratio/low_mean": 0.011367655359208584, "clip_ratio/low_min": 0.007974481210112572, "clip_ratio/region_mean": 0.028276562690734863, "entropy": 0.4295699894428253, "epoch": 4.52054794520548, "grad_norm": 1.5308859192966338, "kl": 0.7154765129089355, "learning_rate": 3.8715753424657535e-07, "loss": -0.0018, "step": 660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 724.0, "completions/max_terminated_length": 724.0, "completions/mean_length": 175.6607208251953, "completions/mean_terminated_length": 175.6607208251953, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.3483031392097473, "epoch": 4.527397260273973, "frac_reward_zero_std": 0.0, "grad_norm": 1.9700768143796081, "kl": 0.7719845771789551, "learning_rate": 3.86986301369863e-07, "loss": -0.0222, "num_tokens": 9084124.0, "reward": 0.663615882396698, "reward_std": 0.10014528781175613, "rewards/check_gptzero_func/mean": 0.6636158227920532, "rewards/check_gptzero_func/std": 0.28507375717163086, "sampling/importance_sampling_ratio/max": 1.556196689605713, "sampling/importance_sampling_ratio/mean": 1.0003103017807007, "sampling/importance_sampling_ratio/min": 0.3858293294906616, "sampling/sampling_logp_difference/max": 0.9523601531982422, "sampling/sampling_logp_difference/mean": 0.016146982088685036, "step": 661 }, { "clip_ratio/high_max": 0.022058824077248573, "clip_ratio/high_mean": 0.015930162742733955, "clip_ratio/low_mean": 0.013676644302904606, "clip_ratio/low_min": 0.0061562140472233295, "clip_ratio/region_mean": 0.029606806114315987, "entropy": 0.34889641404151917, "epoch": 4.534246575342466, "grad_norm": 1.4999854313636116, "kl": 0.7837495803833008, "learning_rate": 3.8681506849315065e-07, "loss": -0.0316, "step": 662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1045.0, "completions/max_terminated_length": 1045.0, "completions/mean_length": 208.32144165039062, "completions/mean_terminated_length": 208.32144165039062, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.4167136251926422, "epoch": 4.541095890410959, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.946975860963083, "kl": 0.7994295358657837, "learning_rate": 3.866438356164383e-07, "loss": 0.03, "num_tokens": 9100988.0, "reward": 0.483702689409256, "reward_std": 0.0738329291343689, "rewards/check_gptzero_func/mean": 0.4837026298046112, "rewards/check_gptzero_func/std": 0.30932897329330444, "sampling/importance_sampling_ratio/max": 1.576485276222229, "sampling/importance_sampling_ratio/mean": 1.0005877017974854, "sampling/importance_sampling_ratio/min": 0.5277193784713745, "sampling/sampling_logp_difference/max": 0.639190673828125, "sampling/sampling_logp_difference/mean": 0.01791203022003174, "step": 663 }, { "clip_ratio/high_max": 0.016981132328510284, "clip_ratio/high_mean": 0.013709009625017643, "clip_ratio/low_mean": 0.013838080689311028, "clip_ratio/low_min": 0.010101010091602802, "clip_ratio/region_mean": 0.027547091245651245, "entropy": 0.41607800126075745, "epoch": 4.5479452054794525, "grad_norm": 1.6527299204824355, "kl": 0.8005986213684082, "learning_rate": 3.86472602739726e-07, "loss": 0.0198, "step": 664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1240.0, "completions/max_terminated_length": 1240.0, "completions/mean_length": 147.6607208251953, "completions/mean_terminated_length": 147.6607208251953, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.27653375267982483, "epoch": 4.554794520547945, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.979974532451743, "kl": 0.7213494181632996, "learning_rate": 3.863013698630137e-07, "loss": -0.0041, "num_tokens": 9114467.0, "reward": 0.6106540560722351, "reward_std": 0.043032124638557434, "rewards/check_gptzero_func/mean": 0.6106539964675903, "rewards/check_gptzero_func/std": 0.32801780104637146, "sampling/importance_sampling_ratio/max": 1.4024578332901, "sampling/importance_sampling_ratio/mean": 1.0002338886260986, "sampling/importance_sampling_ratio/min": 0.6781557202339172, "sampling/sampling_logp_difference/max": 0.388378381729126, "sampling/sampling_logp_difference/mean": 0.011620687320828438, "step": 665 }, { "clip_ratio/high_max": 0.029288703575730324, "clip_ratio/high_mean": 0.01440689992159605, "clip_ratio/low_mean": 0.010966973379254341, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.025373876094818115, "entropy": 0.2777034342288971, "epoch": 4.561643835616438, "grad_norm": 1.6016922854454976, "kl": 0.719801127910614, "learning_rate": 3.8613013698630136e-07, "loss": -0.0138, "step": 666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 891.0, "completions/max_terminated_length": 891.0, "completions/mean_length": 123.78572082519531, "completions/mean_terminated_length": 123.78572082519531, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.2550201416015625, "epoch": 4.568493150684931, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.4833690907813346, "kl": 0.9037975072860718, "learning_rate": 3.85958904109589e-07, "loss": -0.0021, "num_tokens": 9126705.0, "reward": 0.5884521007537842, "reward_std": 0.06575661152601242, "rewards/check_gptzero_func/mean": 0.5884520411491394, "rewards/check_gptzero_func/std": 0.32686200737953186, "sampling/importance_sampling_ratio/max": 1.724931240081787, "sampling/importance_sampling_ratio/mean": 1.0001493692398071, "sampling/importance_sampling_ratio/min": 0.6266142725944519, "sampling/sampling_logp_difference/max": 0.54518723487854, "sampling/sampling_logp_difference/mean": 0.01368150394409895, "step": 667 }, { "clip_ratio/high_max": 0.021650878712534904, "clip_ratio/high_mean": 0.017859339714050293, "clip_ratio/low_mean": 0.01649324968457222, "clip_ratio/low_min": 0.0033632286358624697, "clip_ratio/region_mean": 0.03435258939862251, "entropy": 0.2583900988101959, "epoch": 4.575342465753424, "grad_norm": 5.067065803731262, "kl": 0.8242195844650269, "learning_rate": 3.857876712328767e-07, "loss": -0.0083, "step": 668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 669.0, "completions/max_terminated_length": 669.0, "completions/mean_length": 132.69644165039062, "completions/mean_terminated_length": 132.69644165039062, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.28814569115638733, "epoch": 4.582191780821918, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.3819182798413157, "kl": 0.979517936706543, "learning_rate": 3.8561643835616436e-07, "loss": 0.0037, "num_tokens": 9139338.0, "reward": 0.5235966444015503, "reward_std": 0.07002706080675125, "rewards/check_gptzero_func/mean": 0.5235966444015503, "rewards/check_gptzero_func/std": 0.34857088327407837, "sampling/importance_sampling_ratio/max": 1.630670428276062, "sampling/importance_sampling_ratio/mean": 0.9996920824050903, "sampling/importance_sampling_ratio/min": 0.6158264875411987, "sampling/sampling_logp_difference/max": 0.48899126052856445, "sampling/sampling_logp_difference/mean": 0.015564980916678905, "step": 669 }, { "clip_ratio/high_max": 0.03219512104988098, "clip_ratio/high_mean": 0.019955184310674667, "clip_ratio/low_mean": 0.01758543960750103, "clip_ratio/low_min": 0.0064850845374166965, "clip_ratio/region_mean": 0.03754062205553055, "entropy": 0.2877114713191986, "epoch": 4.589041095890411, "grad_norm": 2.0283083003071685, "kl": 0.976472795009613, "learning_rate": 3.85445205479452e-07, "loss": -0.0052, "step": 670 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1892.0, "completions/max_terminated_length": 1892.0, "completions/mean_length": 236.83929443359375, "completions/mean_terminated_length": 236.83929443359375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.4079037308692932, "epoch": 4.595890410958904, "frac_reward_zero_std": 0.0, "grad_norm": 1.781294732161209, "kl": 0.7534908652305603, "learning_rate": 3.852739726027397e-07, "loss": -0.0011, "num_tokens": 9158325.0, "reward": 0.587024450302124, "reward_std": 0.07247471064329147, "rewards/check_gptzero_func/mean": 0.5870243906974792, "rewards/check_gptzero_func/std": 0.35968509316444397, "sampling/importance_sampling_ratio/max": 1.9876788854599, "sampling/importance_sampling_ratio/mean": 1.000842809677124, "sampling/importance_sampling_ratio/min": 0.4902222752571106, "sampling/sampling_logp_difference/max": 0.7128963470458984, "sampling/sampling_logp_difference/mean": 0.016729334369301796, "step": 671 }, { "clip_ratio/high_max": 0.019308943301439285, "clip_ratio/high_mean": 0.013547603972256184, "clip_ratio/low_mean": 0.011070736683905125, "clip_ratio/low_min": 0.0037359900306910276, "clip_ratio/region_mean": 0.02461834065616131, "entropy": 0.40798088908195496, "epoch": 4.602739726027397, "grad_norm": 1.4930469251544267, "kl": 0.7736772894859314, "learning_rate": 3.8510273972602737e-07, "loss": -0.0104, "step": 672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 963.0, "completions/max_terminated_length": 963.0, "completions/mean_length": 214.80357360839844, "completions/mean_terminated_length": 214.80357360839844, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.3517705500125885, "epoch": 4.609589041095891, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.1189080015875645, "kl": 0.8441285490989685, "learning_rate": 3.84931506849315e-07, "loss": 0.0096, "num_tokens": 9175380.0, "reward": 0.596084475517273, "reward_std": 0.09771735966205597, "rewards/check_gptzero_func/mean": 0.596084475517273, "rewards/check_gptzero_func/std": 0.3037835955619812, "sampling/importance_sampling_ratio/max": 1.8195816278457642, "sampling/importance_sampling_ratio/mean": 0.9992167353630066, "sampling/importance_sampling_ratio/min": 0.4870380461215973, "sampling/sampling_logp_difference/max": 0.7194130420684814, "sampling/sampling_logp_difference/mean": 0.017461363226175308, "step": 673 }, { "clip_ratio/high_max": 0.021052632480859756, "clip_ratio/high_mean": 0.016059624031186104, "clip_ratio/low_mean": 0.01139076892286539, "clip_ratio/low_min": 0.00804289523512125, "clip_ratio/region_mean": 0.02745039388537407, "entropy": 0.3542877733707428, "epoch": 4.616438356164384, "grad_norm": 1.5858150190137112, "kl": 0.8439086079597473, "learning_rate": 3.847602739726027e-07, "loss": -0.0004, "step": 674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1029.0, "completions/max_terminated_length": 1029.0, "completions/mean_length": 141.48214721679688, "completions/mean_terminated_length": 141.48214721679688, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.3130929172039032, "epoch": 4.623287671232877, "frac_reward_zero_std": 0.0, "grad_norm": 2.378552360365706, "kl": 0.9639342427253723, "learning_rate": 3.845890410958904e-07, "loss": 0.0142, "num_tokens": 9187867.0, "reward": 0.5033004879951477, "reward_std": 0.12423477321863174, "rewards/check_gptzero_func/mean": 0.5033004879951477, "rewards/check_gptzero_func/std": 0.3214437961578369, "sampling/importance_sampling_ratio/max": 1.6598752737045288, "sampling/importance_sampling_ratio/mean": 1.0005439519882202, "sampling/importance_sampling_ratio/min": 0.617139458656311, "sampling/sampling_logp_difference/max": 0.5067424774169922, "sampling/sampling_logp_difference/mean": 0.01670224778354168, "step": 675 }, { "clip_ratio/high_max": 0.02568807266652584, "clip_ratio/high_mean": 0.018627239391207695, "clip_ratio/low_mean": 0.016483407467603683, "clip_ratio/low_min": 0.008955223485827446, "clip_ratio/region_mean": 0.03511064499616623, "entropy": 0.31701141595840454, "epoch": 4.63013698630137, "grad_norm": 2.009208314306859, "kl": 0.9534974694252014, "learning_rate": 3.8441780821917807e-07, "loss": 0.0029, "step": 676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1396.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 166.42857360839844, "completions/mean_terminated_length": 166.42857360839844, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.3545680642127991, "epoch": 4.636986301369863, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.025190653706932, "kl": 0.9443671107292175, "learning_rate": 3.842465753424657e-07, "loss": 0.0271, "num_tokens": 9201775.0, "reward": 0.5668306350708008, "reward_std": 0.07156083732843399, "rewards/check_gptzero_func/mean": 0.5668306946754456, "rewards/check_gptzero_func/std": 0.31242820620536804, "sampling/importance_sampling_ratio/max": 1.5223071575164795, "sampling/importance_sampling_ratio/mean": 1.0004792213439941, "sampling/importance_sampling_ratio/min": 0.6469994783401489, "sampling/sampling_logp_difference/max": 0.4354097843170166, "sampling/sampling_logp_difference/mean": 0.016654493287205696, "step": 677 }, { "clip_ratio/high_max": 0.031073445454239845, "clip_ratio/high_mean": 0.01679352857172489, "clip_ratio/low_mean": 0.014088473282754421, "clip_ratio/low_min": 0.007365439087152481, "clip_ratio/region_mean": 0.03088200092315674, "entropy": 0.3539706766605377, "epoch": 4.6438356164383565, "grad_norm": 1.5167872642161093, "kl": 0.9384012818336487, "learning_rate": 3.8407534246575343e-07, "loss": 0.0171, "step": 678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.0, "completions/max_terminated_length": 265.0, "completions/mean_length": 93.6964340209961, "completions/mean_terminated_length": 93.6964340209961, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.24409827589988708, "epoch": 4.6506849315068495, "frac_reward_zero_std": 0.0, "grad_norm": 2.9371592817073515, "kl": 1.1724783182144165, "learning_rate": 3.839041095890411e-07, "loss": 0.0054, "num_tokens": 9211712.0, "reward": 0.5960971713066101, "reward_std": 0.07856356352567673, "rewards/check_gptzero_func/mean": 0.5960971117019653, "rewards/check_gptzero_func/std": 0.2815644443035126, "sampling/importance_sampling_ratio/max": 1.6510783433914185, "sampling/importance_sampling_ratio/mean": 0.9996419548988342, "sampling/importance_sampling_ratio/min": 0.29451292753219604, "sampling/sampling_logp_difference/max": 1.2224323749542236, "sampling/sampling_logp_difference/mean": 0.015288060531020164, "step": 679 }, { "clip_ratio/high_max": 0.029729729518294334, "clip_ratio/high_mean": 0.023034943267703056, "clip_ratio/low_mean": 0.02441486157476902, "clip_ratio/low_min": 0.010920437052845955, "clip_ratio/region_mean": 0.04744980111718178, "entropy": 0.2501088082790375, "epoch": 4.657534246575342, "grad_norm": 2.8876100180077815, "kl": 1.152504563331604, "learning_rate": 3.8373287671232873e-07, "loss": -0.0013, "step": 680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 666.0, "completions/max_terminated_length": 666.0, "completions/mean_length": 143.23214721679688, "completions/mean_terminated_length": 143.23214721679688, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.30072757601737976, "epoch": 4.664383561643835, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.3550275395653073, "kl": 0.9402450919151306, "learning_rate": 3.835616438356164e-07, "loss": -0.0326, "num_tokens": 9224991.0, "reward": 0.5499899387359619, "reward_std": 0.12224128097295761, "rewards/check_gptzero_func/mean": 0.5499899387359619, "rewards/check_gptzero_func/std": 0.31510841846466064, "sampling/importance_sampling_ratio/max": 1.5446823835372925, "sampling/importance_sampling_ratio/mean": 0.9995193481445312, "sampling/importance_sampling_ratio/min": 0.5161824226379395, "sampling/sampling_logp_difference/max": 0.6612950563430786, "sampling/sampling_logp_difference/mean": 0.015714988112449646, "step": 681 }, { "clip_ratio/high_max": 0.027233116328716278, "clip_ratio/high_mean": 0.01868155412375927, "clip_ratio/low_mean": 0.01653445139527321, "clip_ratio/low_min": 0.006657789461314678, "clip_ratio/region_mean": 0.03521600738167763, "entropy": 0.30408892035484314, "epoch": 4.671232876712329, "grad_norm": 3.4745686487281873, "kl": 0.9350519180297852, "learning_rate": 3.833904109589041e-07, "loss": -0.0412, "step": 682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 799.0, "completions/max_terminated_length": 799.0, "completions/mean_length": 152.05357360839844, "completions/mean_terminated_length": 152.05357360839844, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.32831472158432007, "epoch": 4.678082191780822, "frac_reward_zero_std": 0.0, "grad_norm": 2.2079918182891753, "kl": 1.1129077672958374, "learning_rate": 3.832191780821918e-07, "loss": 0.0063, "num_tokens": 9238170.0, "reward": 0.5265172719955444, "reward_std": 0.12221936136484146, "rewards/check_gptzero_func/mean": 0.5265172719955444, "rewards/check_gptzero_func/std": 0.27849310636520386, "sampling/importance_sampling_ratio/max": 1.551880955696106, "sampling/importance_sampling_ratio/mean": 1.0002127885818481, "sampling/importance_sampling_ratio/min": 0.563404381275177, "sampling/sampling_logp_difference/max": 0.5737576484680176, "sampling/sampling_logp_difference/mean": 0.017554080113768578, "step": 683 }, { "clip_ratio/high_max": 0.025940338149666786, "clip_ratio/high_mean": 0.017264895141124725, "clip_ratio/low_mean": 0.016429927200078964, "clip_ratio/low_min": 0.012698412872850895, "clip_ratio/region_mean": 0.03369482234120369, "entropy": 0.32922664284706116, "epoch": 4.684931506849315, "grad_norm": 1.719356480677355, "kl": 1.1086177825927734, "learning_rate": 3.8304794520547944e-07, "loss": -0.0047, "step": 684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 666.0, "completions/max_terminated_length": 666.0, "completions/mean_length": 125.75000762939453, "completions/mean_terminated_length": 125.75000762939453, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.31717827916145325, "epoch": 4.691780821917808, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.532699617883894, "kl": 1.1980485916137695, "learning_rate": 3.8287671232876714e-07, "loss": 0.0289, "num_tokens": 9250006.0, "reward": 0.5845308303833008, "reward_std": 0.08487344533205032, "rewards/check_gptzero_func/mean": 0.584530770778656, "rewards/check_gptzero_func/std": 0.3478301167488098, "sampling/importance_sampling_ratio/max": 1.5311574935913086, "sampling/importance_sampling_ratio/mean": 0.9997502565383911, "sampling/importance_sampling_ratio/min": 0.6093573570251465, "sampling/sampling_logp_difference/max": 0.49535036087036133, "sampling/sampling_logp_difference/mean": 0.016130268573760986, "step": 685 }, { "clip_ratio/high_max": 0.03236246109008789, "clip_ratio/high_mean": 0.02158961072564125, "clip_ratio/low_mean": 0.023119714111089706, "clip_ratio/low_min": 0.004279600456357002, "clip_ratio/region_mean": 0.04470932483673096, "entropy": 0.31283849477767944, "epoch": 4.698630136986301, "grad_norm": 2.7089959966322747, "kl": 1.2240867614746094, "learning_rate": 3.827054794520548e-07, "loss": 0.0178, "step": 686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 940.0, "completions/max_terminated_length": 940.0, "completions/mean_length": 106.62500762939453, "completions/mean_terminated_length": 106.62500762939453, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.21972009539604187, "epoch": 4.705479452054795, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.181651169758013, "kl": 1.0122336149215698, "learning_rate": 3.8253424657534244e-07, "loss": 0.0012, "num_tokens": 9260671.0, "reward": 0.7048289775848389, "reward_std": 0.06468518823385239, "rewards/check_gptzero_func/mean": 0.7048289179801941, "rewards/check_gptzero_func/std": 0.31643256545066833, "sampling/importance_sampling_ratio/max": 1.5470993518829346, "sampling/importance_sampling_ratio/mean": 0.9992989897727966, "sampling/importance_sampling_ratio/min": 0.5489896535873413, "sampling/sampling_logp_difference/max": 0.5996756553649902, "sampling/sampling_logp_difference/mean": 0.012692277319729328, "step": 687 }, { "clip_ratio/high_max": 0.029077118262648582, "clip_ratio/high_mean": 0.020398736000061035, "clip_ratio/low_mean": 0.015513447113335133, "clip_ratio/low_min": 0.009389671497046947, "clip_ratio/region_mean": 0.03591218218207359, "entropy": 0.21909229457378387, "epoch": 4.712328767123288, "grad_norm": 1.7063568805503069, "kl": 0.9975146055221558, "learning_rate": 3.823630136986301e-07, "loss": -0.0093, "step": 688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 822.0, "completions/max_terminated_length": 822.0, "completions/mean_length": 191.0178680419922, "completions/mean_terminated_length": 191.0178680419922, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.39584508538246155, "epoch": 4.719178082191781, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.275313703618283, "kl": 1.0517598390579224, "learning_rate": 3.821917808219178e-07, "loss": -0.0004, "num_tokens": 9276272.0, "reward": 0.5359244346618652, "reward_std": 0.08160483092069626, "rewards/check_gptzero_func/mean": 0.5359244346618652, "rewards/check_gptzero_func/std": 0.26209867000579834, "sampling/importance_sampling_ratio/max": 1.616037368774414, "sampling/importance_sampling_ratio/mean": 0.99937903881073, "sampling/importance_sampling_ratio/min": 0.5693562626838684, "sampling/sampling_logp_difference/max": 0.563248872756958, "sampling/sampling_logp_difference/mean": 0.01914963871240616, "step": 689 }, { "clip_ratio/high_max": 0.025470653548836708, "clip_ratio/high_mean": 0.015654372051358223, "clip_ratio/low_mean": 0.011008151806890965, "clip_ratio/low_min": 0.006295907776802778, "clip_ratio/region_mean": 0.026662524789571762, "entropy": 0.3985266089439392, "epoch": 4.726027397260274, "grad_norm": 13.67508021490518, "kl": 0.8906562924385071, "learning_rate": 3.8202054794520544e-07, "loss": -0.0043, "step": 690 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1321.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 174.80357360839844, "completions/mean_terminated_length": 174.80357360839844, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.36460986733436584, "epoch": 4.732876712328768, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.0173511064373013, "kl": 0.8664940595626831, "learning_rate": 3.8184931506849315e-07, "loss": -0.0037, "num_tokens": 9290813.0, "reward": 0.5269370675086975, "reward_std": 0.10145396739244461, "rewards/check_gptzero_func/mean": 0.5269370675086975, "rewards/check_gptzero_func/std": 0.35500237345695496, "sampling/importance_sampling_ratio/max": 1.5738112926483154, "sampling/importance_sampling_ratio/mean": 0.9987481236457825, "sampling/importance_sampling_ratio/min": 0.49824392795562744, "sampling/sampling_logp_difference/max": 0.6966655254364014, "sampling/sampling_logp_difference/mean": 0.015977464616298676, "step": 691 }, { "clip_ratio/high_max": 0.021868787705898285, "clip_ratio/high_mean": 0.01386005338281393, "clip_ratio/low_mean": 0.01241552084684372, "clip_ratio/low_min": 0.006963788531720638, "clip_ratio/region_mean": 0.026275575160980225, "entropy": 0.3654852509498596, "epoch": 4.739726027397261, "grad_norm": 1.6824597330587563, "kl": 0.8634970784187317, "learning_rate": 3.8167808219178085e-07, "loss": -0.0143, "step": 692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 915.0, "completions/max_terminated_length": 915.0, "completions/mean_length": 164.82144165039062, "completions/mean_terminated_length": 164.82144165039062, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.3071022629737854, "epoch": 4.7465753424657535, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.101694106635632, "kl": 0.9118372797966003, "learning_rate": 3.815068493150685e-07, "loss": 0.0164, "num_tokens": 9305115.0, "reward": 0.6702442765235901, "reward_std": 0.06966010481119156, "rewards/check_gptzero_func/mean": 0.6702442169189453, "rewards/check_gptzero_func/std": 0.2816430628299713, "sampling/importance_sampling_ratio/max": 1.6104484796524048, "sampling/importance_sampling_ratio/mean": 1.000285267829895, "sampling/importance_sampling_ratio/min": 0.6123162508010864, "sampling/sampling_logp_difference/max": 0.4905064105987549, "sampling/sampling_logp_difference/mean": 0.014662973582744598, "step": 693 }, { "clip_ratio/high_max": 0.01965065486729145, "clip_ratio/high_mean": 0.015236400999128819, "clip_ratio/low_mean": 0.012625781819224358, "clip_ratio/low_min": 0.0043668122962117195, "clip_ratio/region_mean": 0.0278621856123209, "entropy": 0.3050644099712372, "epoch": 4.7534246575342465, "grad_norm": 1.6817086115433497, "kl": 0.914946973323822, "learning_rate": 3.8133561643835615e-07, "loss": 0.0063, "step": 694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1014.0, "completions/max_terminated_length": 1014.0, "completions/mean_length": 175.62501525878906, "completions/mean_terminated_length": 175.62501525878906, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.3125859797000885, "epoch": 4.760273972602739, "frac_reward_zero_std": 0.0, "grad_norm": 4.968740227864545, "kl": 0.8691214919090271, "learning_rate": 3.811643835616438e-07, "loss": -0.0075, "num_tokens": 9319830.0, "reward": 0.6373070478439331, "reward_std": 0.06803847849369049, "rewards/check_gptzero_func/mean": 0.6373070478439331, "rewards/check_gptzero_func/std": 0.30350416898727417, "sampling/importance_sampling_ratio/max": 1.6011137962341309, "sampling/importance_sampling_ratio/mean": 0.998681902885437, "sampling/importance_sampling_ratio/min": 0.5455325841903687, "sampling/sampling_logp_difference/max": 0.6059927940368652, "sampling/sampling_logp_difference/mean": 0.016097579151391983, "step": 695 }, { "clip_ratio/high_max": 0.0239680428057909, "clip_ratio/high_mean": 0.01459044124931097, "clip_ratio/low_mean": 0.008878623135387897, "clip_ratio/low_min": 0.005197055172175169, "clip_ratio/region_mean": 0.023469064384698868, "entropy": 0.3128484785556793, "epoch": 4.767123287671232, "grad_norm": 1.5064804560193, "kl": 0.8679659962654114, "learning_rate": 3.809931506849315e-07, "loss": -0.0152, "step": 696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1288.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 203.50001525878906, "completions/mean_terminated_length": 203.50001525878906, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.42046087980270386, "epoch": 4.773972602739726, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.8488585520454817, "kl": 0.8503796458244324, "learning_rate": 3.8082191780821916e-07, "loss": 0.0105, "num_tokens": 9335824.0, "reward": 0.5919691920280457, "reward_std": 0.047429148107767105, "rewards/check_gptzero_func/mean": 0.5919691324234009, "rewards/check_gptzero_func/std": 0.2933397591114044, "sampling/importance_sampling_ratio/max": 1.5477560758590698, "sampling/importance_sampling_ratio/mean": 0.9989146590232849, "sampling/importance_sampling_ratio/min": 0.6389032006263733, "sampling/sampling_logp_difference/max": 0.44800233840942383, "sampling/sampling_logp_difference/mean": 0.017413649708032608, "step": 697 }, { "clip_ratio/high_max": 0.01916932873427868, "clip_ratio/high_mean": 0.0155068663880229, "clip_ratio/low_mean": 0.011762939393520355, "clip_ratio/low_min": 0.007146941032260656, "clip_ratio/region_mean": 0.02726980671286583, "entropy": 0.41889986395835876, "epoch": 4.780821917808219, "grad_norm": 1.5153271208818406, "kl": 0.8575299382209778, "learning_rate": 3.806506849315068e-07, "loss": 0.0005, "step": 698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 902.0, "completions/max_terminated_length": 902.0, "completions/mean_length": 144.33929443359375, "completions/mean_terminated_length": 144.33929443359375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.2792784571647644, "epoch": 4.787671232876712, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.3223872838488475, "kl": 1.0083311796188354, "learning_rate": 3.8047945205479446e-07, "loss": 0.0033, "num_tokens": 9349189.0, "reward": 0.7137121558189392, "reward_std": 0.05483192950487137, "rewards/check_gptzero_func/mean": 0.7137121558189392, "rewards/check_gptzero_func/std": 0.2775980234146118, "sampling/importance_sampling_ratio/max": 1.7578809261322021, "sampling/importance_sampling_ratio/mean": 0.99994295835495, "sampling/importance_sampling_ratio/min": 0.5216038227081299, "sampling/sampling_logp_difference/max": 0.6508469581604004, "sampling/sampling_logp_difference/mean": 0.014140184968709946, "step": 699 }, { "clip_ratio/high_max": 0.02719406597316265, "clip_ratio/high_mean": 0.015167015604674816, "clip_ratio/low_mean": 0.011437477543950081, "clip_ratio/low_min": 0.0030911900103092194, "clip_ratio/region_mean": 0.02660449407994747, "entropy": 0.2801264226436615, "epoch": 4.794520547945205, "grad_norm": 1.646706603733576, "kl": 0.9336292147636414, "learning_rate": 3.803082191780822e-07, "loss": -0.0069, "step": 700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1093.0, "completions/max_terminated_length": 1093.0, "completions/mean_length": 176.23214721679688, "completions/mean_terminated_length": 176.23214721679688, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.39921000599861145, "epoch": 4.801369863013699, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.0846119914052728, "kl": 0.9069386124610901, "learning_rate": 3.8013698630136986e-07, "loss": -0.0258, "num_tokens": 9363456.0, "reward": 0.5474386215209961, "reward_std": 0.0564093217253685, "rewards/check_gptzero_func/mean": 0.5474386215209961, "rewards/check_gptzero_func/std": 0.34766173362731934, "sampling/importance_sampling_ratio/max": 1.8179681301116943, "sampling/importance_sampling_ratio/mean": 0.9999858140945435, "sampling/importance_sampling_ratio/min": 0.6087208986282349, "sampling/sampling_logp_difference/max": 0.5977194309234619, "sampling/sampling_logp_difference/mean": 0.01702868938446045, "step": 701 }, { "clip_ratio/high_max": 0.02678571455180645, "clip_ratio/high_mean": 0.01636587269604206, "clip_ratio/low_mean": 0.013705318793654442, "clip_ratio/low_min": 0.0057416269555687904, "clip_ratio/region_mean": 0.030071189627051353, "entropy": 0.40096139907836914, "epoch": 4.808219178082192, "grad_norm": 1.7393264339077976, "kl": 0.8896074295043945, "learning_rate": 3.799657534246575e-07, "loss": -0.0365, "step": 702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 706.0, "completions/max_terminated_length": 706.0, "completions/mean_length": 120.35714721679688, "completions/mean_terminated_length": 120.35714721679688, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.3325296938419342, "epoch": 4.815068493150685, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.371750914329683, "kl": 1.0466034412384033, "learning_rate": 3.797945205479452e-07, "loss": 0.0453, "num_tokens": 9374562.0, "reward": 0.5647859573364258, "reward_std": 0.08825047314167023, "rewards/check_gptzero_func/mean": 0.5647859573364258, "rewards/check_gptzero_func/std": 0.3471193313598633, "sampling/importance_sampling_ratio/max": 1.4916131496429443, "sampling/importance_sampling_ratio/mean": 1.0001987218856812, "sampling/importance_sampling_ratio/min": 0.6483188271522522, "sampling/sampling_logp_difference/max": 0.43337273597717285, "sampling/sampling_logp_difference/mean": 0.014604654163122177, "step": 703 }, { "clip_ratio/high_max": 0.021331945434212685, "clip_ratio/high_mean": 0.015459594316780567, "clip_ratio/low_mean": 0.01996701955795288, "clip_ratio/low_min": 0.01300728414207697, "clip_ratio/region_mean": 0.03542661294341087, "entropy": 0.333343505859375, "epoch": 4.821917808219178, "grad_norm": 1.9500587920984092, "kl": 1.049636960029602, "learning_rate": 3.7962328767123287e-07, "loss": 0.034, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/max_terminated_length": 490.0, "completions/mean_length": 111.5714340209961, "completions/mean_terminated_length": 111.5714340209961, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.2288762480020523, "epoch": 4.828767123287671, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.504439361757365, "kl": 0.9489203095436096, "learning_rate": 3.794520547945205e-07, "loss": 0.0223, "num_tokens": 9385734.0, "reward": 0.6387072801589966, "reward_std": 0.0893620103597641, "rewards/check_gptzero_func/mean": 0.638707160949707, "rewards/check_gptzero_func/std": 0.29901009798049927, "sampling/importance_sampling_ratio/max": 1.6878516674041748, "sampling/importance_sampling_ratio/mean": 1.0004783868789673, "sampling/importance_sampling_ratio/min": 0.6451036930084229, "sampling/sampling_logp_difference/max": 0.5234565734863281, "sampling/sampling_logp_difference/mean": 0.013006489723920822, "step": 705 }, { "clip_ratio/high_max": 0.03161397576332092, "clip_ratio/high_mean": 0.021129684522747993, "clip_ratio/low_mean": 0.021096521988511086, "clip_ratio/low_min": 0.008571428246796131, "clip_ratio/region_mean": 0.04222620278596878, "entropy": 0.22622700035572052, "epoch": 4.835616438356165, "grad_norm": 2.2176773467013438, "kl": 0.9854221343994141, "learning_rate": 3.7928082191780817e-07, "loss": 0.0119, "step": 706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.0, "completions/max_terminated_length": 282.0, "completions/mean_length": 97.51786041259766, "completions/mean_terminated_length": 97.51786041259766, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.2213972806930542, "epoch": 4.842465753424658, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.1473037721093235, "kl": 1.0345803499221802, "learning_rate": 3.7910958904109587e-07, "loss": 0.0112, "num_tokens": 9396077.0, "reward": 0.6606373190879822, "reward_std": 0.04208876192569733, "rewards/check_gptzero_func/mean": 0.6606373190879822, "rewards/check_gptzero_func/std": 0.33217254281044006, "sampling/importance_sampling_ratio/max": 1.481650710105896, "sampling/importance_sampling_ratio/mean": 1.0006238222122192, "sampling/importance_sampling_ratio/min": 0.6955021619796753, "sampling/sampling_logp_difference/max": 0.3931567668914795, "sampling/sampling_logp_difference/mean": 0.01181555911898613, "step": 707 }, { "clip_ratio/high_max": 0.031111111864447594, "clip_ratio/high_mean": 0.01654662750661373, "clip_ratio/low_mean": 0.016103899106383324, "clip_ratio/low_min": 0.00206611561588943, "clip_ratio/region_mean": 0.032650526612997055, "entropy": 0.21926213800907135, "epoch": 4.8493150684931505, "grad_norm": 1.4258462905097309, "kl": 1.0067445039749146, "learning_rate": 3.789383561643835e-07, "loss": 0.0022, "step": 708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 156.25, "completions/mean_terminated_length": 156.25, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.3720008432865143, "epoch": 4.8561643835616435, "frac_reward_zero_std": 0.0, "grad_norm": 2.4623020293221742, "kl": 1.166725993156433, "learning_rate": 3.787671232876712e-07, "loss": 0.0277, "num_tokens": 9409487.0, "reward": 0.5540298223495483, "reward_std": 0.09012462943792343, "rewards/check_gptzero_func/mean": 0.5540298223495483, "rewards/check_gptzero_func/std": 0.27719900012016296, "sampling/importance_sampling_ratio/max": 1.620957851409912, "sampling/importance_sampling_ratio/mean": 0.9997102618217468, "sampling/importance_sampling_ratio/min": 0.6124805212020874, "sampling/sampling_logp_difference/max": 0.4902381896972656, "sampling/sampling_logp_difference/mean": 0.01704002358019352, "step": 709 }, { "clip_ratio/high_max": 0.02567567490041256, "clip_ratio/high_mean": 0.019333088770508766, "clip_ratio/low_mean": 0.017950164154171944, "clip_ratio/low_min": 0.0091210613027215, "clip_ratio/region_mean": 0.03728324919939041, "entropy": 0.3728289008140564, "epoch": 4.863013698630137, "grad_norm": 5.258942748145617, "kl": 1.1453410387039185, "learning_rate": 3.7859589041095893e-07, "loss": 0.018, "step": 710 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 818.0, "completions/max_terminated_length": 818.0, "completions/mean_length": 120.75000762939453, "completions/mean_terminated_length": 120.75000762939453, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.30793747305870056, "epoch": 4.86986301369863, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.33546190544425, "kl": 1.1708239316940308, "learning_rate": 3.784246575342466e-07, "loss": -0.0016, "num_tokens": 9420995.0, "reward": 0.5243759155273438, "reward_std": 0.10976542532444, "rewards/check_gptzero_func/mean": 0.5243759155273438, "rewards/check_gptzero_func/std": 0.3696919083595276, "sampling/importance_sampling_ratio/max": 1.4986677169799805, "sampling/importance_sampling_ratio/mean": 0.999649167060852, "sampling/importance_sampling_ratio/min": 0.6115652918815613, "sampling/sampling_logp_difference/max": 0.4917335510253906, "sampling/sampling_logp_difference/mean": 0.015668539330363274, "step": 711 }, { "clip_ratio/high_max": 0.029831387102603912, "clip_ratio/high_mean": 0.019721554592251778, "clip_ratio/low_mean": 0.012928813695907593, "clip_ratio/low_min": 0.009079118259251118, "clip_ratio/region_mean": 0.03265037015080452, "entropy": 0.31133022904396057, "epoch": 4.876712328767123, "grad_norm": 1.9585935016309552, "kl": 1.1471589803695679, "learning_rate": 3.7825342465753423e-07, "loss": -0.013, "step": 712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1209.0, "completions/max_terminated_length": 1209.0, "completions/mean_length": 144.17857360839844, "completions/mean_terminated_length": 144.17857360839844, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.2529354691505432, "epoch": 4.883561643835616, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.203680092172685, "kl": 1.0518542528152466, "learning_rate": 3.780821917808219e-07, "loss": 0.0126, "num_tokens": 9434233.0, "reward": 0.6049625277519226, "reward_std": 0.06695729494094849, "rewards/check_gptzero_func/mean": 0.6049624681472778, "rewards/check_gptzero_func/std": 0.3474498987197876, "sampling/importance_sampling_ratio/max": 1.6321437358856201, "sampling/importance_sampling_ratio/mean": 0.9993823766708374, "sampling/importance_sampling_ratio/min": 0.5679103136062622, "sampling/sampling_logp_difference/max": 0.5657917261123657, "sampling/sampling_logp_difference/mean": 0.012988651171326637, "step": 713 }, { "clip_ratio/high_max": 0.03188405930995941, "clip_ratio/high_mean": 0.0166168250143528, "clip_ratio/low_mean": 0.012450523674488068, "clip_ratio/low_min": 0.005988024175167084, "clip_ratio/region_mean": 0.029067346826195717, "entropy": 0.2562877833843231, "epoch": 4.890410958904109, "grad_norm": 1.4884952940315093, "kl": 0.9621108174324036, "learning_rate": 3.779109589041096e-07, "loss": 0.0034, "step": 714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1368.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 167.19644165039062, "completions/mean_terminated_length": 167.19644165039062, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.3986954092979431, "epoch": 4.897260273972603, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.1921279454059985, "kl": 0.8714989423751831, "learning_rate": 3.7773972602739723e-07, "loss": 0.0261, "num_tokens": 9448364.0, "reward": 0.5345606803894043, "reward_std": 0.09534052759408951, "rewards/check_gptzero_func/mean": 0.5345606207847595, "rewards/check_gptzero_func/std": 0.36368831992149353, "sampling/importance_sampling_ratio/max": 1.616026759147644, "sampling/importance_sampling_ratio/mean": 1.0000697374343872, "sampling/importance_sampling_ratio/min": 0.7023412585258484, "sampling/sampling_logp_difference/max": 0.47997045516967773, "sampling/sampling_logp_difference/mean": 0.016610348597168922, "step": 715 }, { "clip_ratio/high_max": 0.028875380754470825, "clip_ratio/high_mean": 0.016371289268136024, "clip_ratio/low_mean": 0.012643079273402691, "clip_ratio/low_min": 0.005249343812465668, "clip_ratio/region_mean": 0.02901436947286129, "entropy": 0.39949798583984375, "epoch": 4.904109589041096, "grad_norm": 1.7211549261760548, "kl": 0.8701573610305786, "learning_rate": 3.775684931506849e-07, "loss": 0.0145, "step": 716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 516.0, "completions/max_terminated_length": 516.0, "completions/mean_length": 105.05357360839844, "completions/mean_terminated_length": 105.05357360839844, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.3046114146709442, "epoch": 4.910958904109589, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 3.0447094109182724, "kl": 1.295285940170288, "learning_rate": 3.773972602739726e-07, "loss": -0.013, "num_tokens": 9459179.0, "reward": 0.511677622795105, "reward_std": 0.13456672430038452, "rewards/check_gptzero_func/mean": 0.511677622795105, "rewards/check_gptzero_func/std": 0.3174969255924225, "sampling/importance_sampling_ratio/max": 1.77751624584198, "sampling/importance_sampling_ratio/mean": 1.001107096672058, "sampling/importance_sampling_ratio/min": 0.5692121386528015, "sampling/sampling_logp_difference/max": 0.5752170085906982, "sampling/sampling_logp_difference/mean": 0.01663518324494362, "step": 717 }, { "clip_ratio/high_max": 0.0279720276594162, "clip_ratio/high_mean": 0.020134983584284782, "clip_ratio/low_mean": 0.022986361756920815, "clip_ratio/low_min": 0.010733452625572681, "clip_ratio/region_mean": 0.0431213453412056, "entropy": 0.3063836395740509, "epoch": 4.917808219178082, "grad_norm": 78.35841100079958, "kl": 2.5062859058380127, "learning_rate": 3.772260273972603e-07, "loss": -0.0131, "step": 718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 955.0, "completions/max_terminated_length": 955.0, "completions/mean_length": 123.76786041259766, "completions/mean_terminated_length": 123.76786041259766, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.3093772530555725, "epoch": 4.924657534246576, "frac_reward_zero_std": 0.0, "grad_norm": 2.3423041580233797, "kl": 1.0182304382324219, "learning_rate": 3.7705479452054794e-07, "loss": -0.0002, "num_tokens": 9471120.0, "reward": 0.5671192407608032, "reward_std": 0.08384653180837631, "rewards/check_gptzero_func/mean": 0.5671191811561584, "rewards/check_gptzero_func/std": 0.31950342655181885, "sampling/importance_sampling_ratio/max": 1.4960298538208008, "sampling/importance_sampling_ratio/mean": 0.99977046251297, "sampling/importance_sampling_ratio/min": 0.6194537281990051, "sampling/sampling_logp_difference/max": 0.47891736030578613, "sampling/sampling_logp_difference/mean": 0.014545788988471031, "step": 719 }, { "clip_ratio/high_max": 0.03168567642569542, "clip_ratio/high_mean": 0.022166280075907707, "clip_ratio/low_mean": 0.01694951020181179, "clip_ratio/low_min": 0.012690355069935322, "clip_ratio/region_mean": 0.039115794003009796, "entropy": 0.3109898269176483, "epoch": 4.931506849315069, "grad_norm": 1.997191221986201, "kl": 1.0104249715805054, "learning_rate": 3.768835616438356e-07, "loss": -0.0085, "step": 720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 924.0, "completions/max_terminated_length": 924.0, "completions/mean_length": 161.17857360839844, "completions/mean_terminated_length": 161.17857360839844, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.27490997314453125, "epoch": 4.938356164383562, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.108446991383814, "kl": 0.9239077568054199, "learning_rate": 3.767123287671233e-07, "loss": -0.0118, "num_tokens": 9485006.0, "reward": 0.6969041228294373, "reward_std": 0.07443969696760178, "rewards/check_gptzero_func/mean": 0.6969041228294373, "rewards/check_gptzero_func/std": 0.33585450053215027, "sampling/importance_sampling_ratio/max": 1.9515862464904785, "sampling/importance_sampling_ratio/mean": 1.0000908374786377, "sampling/importance_sampling_ratio/min": 0.6131078600883484, "sampling/sampling_logp_difference/max": 0.668642520904541, "sampling/sampling_logp_difference/mean": 0.01277144718915224, "step": 721 }, { "clip_ratio/high_max": 0.020757021382451057, "clip_ratio/high_mean": 0.012418775819242, "clip_ratio/low_mean": 0.013271304778754711, "clip_ratio/low_min": 0.0018281536176800728, "clip_ratio/region_mean": 0.02569008246064186, "entropy": 0.273037850856781, "epoch": 4.945205479452055, "grad_norm": 10.38498855229083, "kl": 1.2140885591506958, "learning_rate": 3.7654109589041094e-07, "loss": -0.0199, "step": 722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 215.23214721679688, "completions/mean_terminated_length": 215.23214721679688, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.33150434494018555, "epoch": 4.9520547945205475, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.890074375057091, "kl": 0.7615768313407898, "learning_rate": 3.763698630136986e-07, "loss": 0.0208, "num_tokens": 9502287.0, "reward": 0.6303164958953857, "reward_std": 0.06649339199066162, "rewards/check_gptzero_func/mean": 0.6303165555000305, "rewards/check_gptzero_func/std": 0.3326883018016815, "sampling/importance_sampling_ratio/max": 1.4754077196121216, "sampling/importance_sampling_ratio/mean": 0.9995540976524353, "sampling/importance_sampling_ratio/min": 0.6280486583709717, "sampling/sampling_logp_difference/max": 0.4651377201080322, "sampling/sampling_logp_difference/mean": 0.014312086626887321, "step": 723 }, { "clip_ratio/high_max": 0.021443888545036316, "clip_ratio/high_mean": 0.014108727686107159, "clip_ratio/low_mean": 0.013247980736196041, "clip_ratio/low_min": 0.007654116488993168, "clip_ratio/region_mean": 0.02735671028494835, "entropy": 0.32941123843193054, "epoch": 4.958904109589041, "grad_norm": 1.5174850163982445, "kl": 0.766677737236023, "learning_rate": 3.761986301369863e-07, "loss": 0.0116, "step": 724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1080.0, "completions/max_terminated_length": 1080.0, "completions/mean_length": 197.25001525878906, "completions/mean_terminated_length": 197.25001525878906, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.3803732097148895, "epoch": 4.965753424657534, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.0445489321767902, "kl": 0.8564308881759644, "learning_rate": 3.7602739726027395e-07, "loss": 0.0133, "num_tokens": 9518123.0, "reward": 0.587533712387085, "reward_std": 0.12551312148571014, "rewards/check_gptzero_func/mean": 0.5875336527824402, "rewards/check_gptzero_func/std": 0.3097010850906372, "sampling/importance_sampling_ratio/max": 1.6012719869613647, "sampling/importance_sampling_ratio/mean": 0.9990471601486206, "sampling/importance_sampling_ratio/min": 0.623968243598938, "sampling/sampling_logp_difference/max": 0.47165584564208984, "sampling/sampling_logp_difference/mean": 0.017187004908919334, "step": 725 }, { "clip_ratio/high_max": 0.017391303554177284, "clip_ratio/high_mean": 0.013539610430598259, "clip_ratio/low_mean": 0.016246339306235313, "clip_ratio/low_min": 0.008611955679953098, "clip_ratio/region_mean": 0.029785949736833572, "entropy": 0.3782889246940613, "epoch": 4.972602739726027, "grad_norm": 1.8299109422023203, "kl": 0.8885704874992371, "learning_rate": 3.758561643835616e-07, "loss": 0.0027, "step": 726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1130.0, "completions/max_terminated_length": 1130.0, "completions/mean_length": 132.6607208251953, "completions/mean_terminated_length": 132.6607208251953, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.2867494523525238, "epoch": 4.97945205479452, "frac_reward_zero_std": 0.0, "grad_norm": 2.2847596665934926, "kl": 0.9633134007453918, "learning_rate": 3.756849315068493e-07, "loss": 0.0279, "num_tokens": 9530012.0, "reward": 0.7003718614578247, "reward_std": 0.0689680203795433, "rewards/check_gptzero_func/mean": 0.7003718018531799, "rewards/check_gptzero_func/std": 0.29927390813827515, "sampling/importance_sampling_ratio/max": 1.6978610754013062, "sampling/importance_sampling_ratio/mean": 0.9992013573646545, "sampling/importance_sampling_ratio/min": 0.5388728380203247, "sampling/sampling_logp_difference/max": 0.6182756423950195, "sampling/sampling_logp_difference/mean": 0.014723353087902069, "step": 727 }, { "clip_ratio/high_max": 0.026737967506051064, "clip_ratio/high_mean": 0.017100725322961807, "clip_ratio/low_mean": 0.016153905540704727, "clip_ratio/low_min": 0.0026041667442768812, "clip_ratio/region_mean": 0.033254627138376236, "entropy": 0.28417089581489563, "epoch": 4.986301369863014, "grad_norm": 1.999281161204624, "kl": 0.9853949546813965, "learning_rate": 3.75513698630137e-07, "loss": 0.018, "step": 728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 220.50001525878906, "completions/mean_terminated_length": 220.50001525878906, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.4193766415119171, "epoch": 4.993150684931507, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.941893872276891, "kl": 0.8053403496742249, "learning_rate": 3.7534246575342466e-07, "loss": 0.0059, "num_tokens": 9547370.0, "reward": 0.6308106780052185, "reward_std": 0.058235689997673035, "rewards/check_gptzero_func/mean": 0.6308106780052185, "rewards/check_gptzero_func/std": 0.32382094860076904, "sampling/importance_sampling_ratio/max": 1.678184151649475, "sampling/importance_sampling_ratio/mean": 0.9995317459106445, "sampling/importance_sampling_ratio/min": 0.6288557648658752, "sampling/sampling_logp_difference/max": 0.5177123546600342, "sampling/sampling_logp_difference/mean": 0.018032589927315712, "step": 729 }, { "clip_ratio/high_max": 0.02323838137090206, "clip_ratio/high_mean": 0.016569582745432854, "clip_ratio/low_mean": 0.011778625659644604, "clip_ratio/low_min": 0.007052185945212841, "clip_ratio/region_mean": 0.028348207473754883, "entropy": 0.4195895791053772, "epoch": 5.0, "grad_norm": 1.7342689896771877, "kl": 0.7921684980392456, "learning_rate": 3.751712328767123e-07, "loss": -0.0036, "step": 730 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 229.6428680419922, "completions/mean_terminated_length": 229.6428680419922, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.40793830156326294, "epoch": 5.006849315068493, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.7780507471426774, "kl": 0.7606124877929688, "learning_rate": 3.75e-07, "loss": 0.0404, "num_tokens": 9565168.0, "reward": 0.6074344515800476, "reward_std": 0.039323046803474426, "rewards/check_gptzero_func/mean": 0.6074343919754028, "rewards/check_gptzero_func/std": 0.273851603269577, "sampling/importance_sampling_ratio/max": 1.987719178199768, "sampling/importance_sampling_ratio/mean": 0.9997813105583191, "sampling/importance_sampling_ratio/min": 0.39415091276168823, "sampling/sampling_logp_difference/max": 0.9310214519500732, "sampling/sampling_logp_difference/mean": 0.01581752300262451, "step": 731 }, { "clip_ratio/high_max": 0.018413597717881203, "clip_ratio/high_mean": 0.010635266080498695, "clip_ratio/low_mean": 0.007889916189014912, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.01852518320083618, "entropy": 0.4089077413082123, "epoch": 5.013698630136986, "grad_norm": 1.5037627047867272, "kl": 0.7578458786010742, "learning_rate": 3.7482876712328766e-07, "loss": 0.0308, "step": 732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1080.0, "completions/max_terminated_length": 1080.0, "completions/mean_length": 162.33929443359375, "completions/mean_terminated_length": 162.33929443359375, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.380716472864151, "epoch": 5.02054794520548, "frac_reward_zero_std": 1.0, "grad_norm": 0.30875515307239787, "kl": 0.9521262049674988, "learning_rate": 3.746575342465753e-07, "loss": 0.0084, "num_tokens": 9579319.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_gptzero_func/mean": 0.0, "rewards/check_gptzero_func/std": 0.0, "sampling/importance_sampling_ratio/max": 1.5943219661712646, "sampling/importance_sampling_ratio/mean": 1.0000301599502563, "sampling/importance_sampling_ratio/min": 0.4309800863265991, "sampling/sampling_logp_difference/max": 0.8416934013366699, "sampling/sampling_logp_difference/mean": 0.016333848237991333, "step": 733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "entropy": 0.3872165083885193, "epoch": 5.027397260273973, "grad_norm": 0.08602091734906081, "kl": 0.9074004292488098, "learning_rate": 3.7448630136986296e-07, "loss": 0.0079, "step": 734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1374.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 302.9285888671875, "completions/mean_terminated_length": 302.9285888671875, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.5663076639175415, "epoch": 5.034246575342466, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.7532649554390436, "kl": 0.6280479431152344, "learning_rate": 3.7431506849315066e-07, "loss": 0.0241, "num_tokens": 9601033.0, "reward": 0.5806462168693542, "reward_std": 0.08288934826850891, "rewards/check_gptzero_func/mean": 0.5806461572647095, "rewards/check_gptzero_func/std": 0.3106030523777008, "sampling/importance_sampling_ratio/max": 1.4368325471878052, "sampling/importance_sampling_ratio/mean": 1.0012414455413818, "sampling/importance_sampling_ratio/min": 0.6299540400505066, "sampling/sampling_logp_difference/max": 0.4621083736419678, "sampling/sampling_logp_difference/mean": 0.020765194669365883, "step": 735 }, { "clip_ratio/high_max": 0.01595214381814003, "clip_ratio/high_mean": 0.010941649787127972, "clip_ratio/low_mean": 0.010048219002783298, "clip_ratio/low_min": 0.004312668461352587, "clip_ratio/region_mean": 0.02098986878991127, "entropy": 0.5683208107948303, "epoch": 5.041095890410959, "grad_norm": 6.766812250840026, "kl": 0.5909820199012756, "learning_rate": 3.7414383561643837e-07, "loss": 0.0196, "step": 736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1095.0, "completions/max_terminated_length": 1095.0, "completions/mean_length": 176.80357360839844, "completions/mean_terminated_length": 176.80357360839844, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.4355491101741791, "epoch": 5.0479452054794525, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.1034240036854293, "kl": 0.857422411441803, "learning_rate": 3.73972602739726e-07, "loss": -0.0238, "num_tokens": 9615538.0, "reward": 0.5625447034835815, "reward_std": 0.06611457467079163, "rewards/check_gptzero_func/mean": 0.5625447034835815, "rewards/check_gptzero_func/std": 0.2930700480937958, "sampling/importance_sampling_ratio/max": 1.5417358875274658, "sampling/importance_sampling_ratio/mean": 1.0007033348083496, "sampling/importance_sampling_ratio/min": 0.6392273902893066, "sampling/sampling_logp_difference/max": 0.4474949836730957, "sampling/sampling_logp_difference/mean": 0.017934650182724, "step": 737 }, { "clip_ratio/high_max": 0.026436781510710716, "clip_ratio/high_mean": 0.016766931861639023, "clip_ratio/low_mean": 0.01364800613373518, "clip_ratio/low_min": 0.007552870083600283, "clip_ratio/region_mean": 0.030414938926696777, "entropy": 0.43702903389930725, "epoch": 5.054794520547945, "grad_norm": 1.6491494653221443, "kl": 0.8376705050468445, "learning_rate": 3.7380136986301367e-07, "loss": -0.0338, "step": 738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 141.30357360839844, "completions/mean_terminated_length": 141.30357360839844, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.28349924087524414, "epoch": 5.061643835616438, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.1164462309485574, "kl": 0.8184328675270081, "learning_rate": 3.7363013698630137e-07, "loss": 0.0238, "num_tokens": 9628793.0, "reward": 0.6768900752067566, "reward_std": 0.08685829490423203, "rewards/check_gptzero_func/mean": 0.6768900156021118, "rewards/check_gptzero_func/std": 0.28912079334259033, "sampling/importance_sampling_ratio/max": 1.6308419704437256, "sampling/importance_sampling_ratio/mean": 1.0000587701797485, "sampling/importance_sampling_ratio/min": 0.644981324672699, "sampling/sampling_logp_difference/max": 0.48909640312194824, "sampling/sampling_logp_difference/mean": 0.013087720610201359, "step": 739 }, { "clip_ratio/high_max": 0.028761062771081924, "clip_ratio/high_mean": 0.01524051371961832, "clip_ratio/low_mean": 0.013561735861003399, "clip_ratio/low_min": 0.00325203244574368, "clip_ratio/region_mean": 0.02880225144326687, "entropy": 0.28339114785194397, "epoch": 5.068493150684931, "grad_norm": 1.645091228415114, "kl": 0.793299674987793, "learning_rate": 3.73458904109589e-07, "loss": 0.0132, "step": 740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.0, "completions/max_terminated_length": 314.0, "completions/mean_length": 106.96428680419922, "completions/mean_terminated_length": 106.96428680419922, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.2640988528728485, "epoch": 5.075342465753424, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.65836827877747, "kl": 1.041565179824829, "learning_rate": 3.7328767123287667e-07, "loss": 0.0113, "num_tokens": 9640071.0, "reward": 0.6620277762413025, "reward_std": 0.09876486659049988, "rewards/check_gptzero_func/mean": 0.6620277762413025, "rewards/check_gptzero_func/std": 0.3495173454284668, "sampling/importance_sampling_ratio/max": 1.6432304382324219, "sampling/importance_sampling_ratio/mean": 1.0005249977111816, "sampling/importance_sampling_ratio/min": 0.6368654370307922, "sampling/sampling_logp_difference/max": 0.49666404724121094, "sampling/sampling_logp_difference/mean": 0.014200509525835514, "step": 741 }, { "clip_ratio/high_max": 0.028833551332354546, "clip_ratio/high_mean": 0.019558457657694817, "clip_ratio/low_mean": 0.02146509289741516, "clip_ratio/low_min": 0.013840830884873867, "clip_ratio/region_mean": 0.04102355241775513, "entropy": 0.2616736888885498, "epoch": 5.082191780821918, "grad_norm": 2.4707904956162343, "kl": 1.0334975719451904, "learning_rate": 3.731164383561644e-07, "loss": 0.0008, "step": 742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1334.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 187.92857360839844, "completions/mean_terminated_length": 187.92857360839844, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.3280600905418396, "epoch": 5.089041095890411, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.9034813389925067, "kl": 0.8649128675460815, "learning_rate": 3.72945205479452e-07, "loss": 0.0334, "num_tokens": 9656043.0, "reward": 0.660110592842102, "reward_std": 0.14306306838989258, "rewards/check_gptzero_func/mean": 0.6601105332374573, "rewards/check_gptzero_func/std": 0.35009729862213135, "sampling/importance_sampling_ratio/max": 1.8530226945877075, "sampling/importance_sampling_ratio/mean": 1.0000553131103516, "sampling/importance_sampling_ratio/min": 0.6170511841773987, "sampling/sampling_logp_difference/max": 0.6168181896209717, "sampling/sampling_logp_difference/mean": 0.015426883473992348, "step": 743 }, { "clip_ratio/high_max": 0.019354838877916336, "clip_ratio/high_mean": 0.014850690960884094, "clip_ratio/low_mean": 0.012970621697604656, "clip_ratio/low_min": 0.006097560748457909, "clip_ratio/region_mean": 0.027821313589811325, "entropy": 0.3266904652118683, "epoch": 5.095890410958904, "grad_norm": 1.619885767313514, "kl": 0.8783426284790039, "learning_rate": 3.727739726027397e-07, "loss": 0.0237, "step": 744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 220.5357208251953, "completions/mean_terminated_length": 220.5357208251953, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.4018241763114929, "epoch": 5.102739726027397, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.8336056852682328, "kl": 0.8735682368278503, "learning_rate": 3.726027397260274e-07, "loss": -0.0134, "num_tokens": 9672685.0, "reward": 0.5425415635108948, "reward_std": 0.10430758446455002, "rewards/check_gptzero_func/mean": 0.5425415635108948, "rewards/check_gptzero_func/std": 0.364370197057724, "sampling/importance_sampling_ratio/max": 1.500982403755188, "sampling/importance_sampling_ratio/mean": 1.0001837015151978, "sampling/importance_sampling_ratio/min": 0.5689173340797424, "sampling/sampling_logp_difference/max": 0.5640201568603516, "sampling/sampling_logp_difference/mean": 0.016745232045650482, "step": 745 }, { "clip_ratio/high_max": 0.020467836409807205, "clip_ratio/high_mean": 0.014387060888111591, "clip_ratio/low_mean": 0.009131737984716892, "clip_ratio/low_min": 0.007007708307355642, "clip_ratio/region_mean": 0.023518797010183334, "entropy": 0.40416184067726135, "epoch": 5.109589041095891, "grad_norm": 1.5595935518525434, "kl": 0.8723235726356506, "learning_rate": 3.724315068493151e-07, "loss": -0.0238, "step": 746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1183.0, "completions/max_terminated_length": 1183.0, "completions/mean_length": 186.30357360839844, "completions/mean_terminated_length": 186.30357360839844, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.47076818346977234, "epoch": 5.116438356164384, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.068062600152577, "kl": 0.9359673261642456, "learning_rate": 3.7226027397260273e-07, "loss": 0.0183, "num_tokens": 9687828.0, "reward": 0.6747405529022217, "reward_std": 0.08895605057477951, "rewards/check_gptzero_func/mean": 0.6747404932975769, "rewards/check_gptzero_func/std": 0.28340208530426025, "sampling/importance_sampling_ratio/max": 1.5637216567993164, "sampling/importance_sampling_ratio/mean": 1.0001777410507202, "sampling/importance_sampling_ratio/min": 0.5260628461837769, "sampling/sampling_logp_difference/max": 0.6423345804214478, "sampling/sampling_logp_difference/mean": 0.019415736198425293, "step": 747 }, { "clip_ratio/high_max": 0.02841918356716633, "clip_ratio/high_mean": 0.015566169284284115, "clip_ratio/low_mean": 0.012374588288366795, "clip_ratio/low_min": 0.0069605568423867226, "clip_ratio/region_mean": 0.02794075571000576, "entropy": 0.4706858992576599, "epoch": 5.123287671232877, "grad_norm": 1.719535102282386, "kl": 0.9299575686454773, "learning_rate": 3.720890410958904e-07, "loss": 0.0074, "step": 748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 818.0, "completions/max_terminated_length": 818.0, "completions/mean_length": 135.42857360839844, "completions/mean_terminated_length": 135.42857360839844, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.27171897888183594, "epoch": 5.13013698630137, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.07230000362694, "kl": 0.8986356854438782, "learning_rate": 3.719178082191781e-07, "loss": -0.0045, "num_tokens": 9700110.0, "reward": 0.7059551477432251, "reward_std": 0.07255786657333374, "rewards/check_gptzero_func/mean": 0.7059550881385803, "rewards/check_gptzero_func/std": 0.3376048803329468, "sampling/importance_sampling_ratio/max": 1.549249291419983, "sampling/importance_sampling_ratio/mean": 1.0000768899917603, "sampling/importance_sampling_ratio/min": 0.6197354197502136, "sampling/sampling_logp_difference/max": 0.47846269607543945, "sampling/sampling_logp_difference/mean": 0.01277938298881054, "step": 749 }, { "clip_ratio/high_max": 0.02066420577466488, "clip_ratio/high_mean": 0.015822676941752434, "clip_ratio/low_mean": 0.01268392987549305, "clip_ratio/low_min": 0.0033613445702940226, "clip_ratio/region_mean": 0.028506606817245483, "entropy": 0.26953330636024475, "epoch": 5.136986301369863, "grad_norm": 1.7024560197491896, "kl": 0.914847195148468, "learning_rate": 3.7174657534246574e-07, "loss": -0.0148, "step": 750 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 678.0, "completions/max_terminated_length": 678.0, "completions/mean_length": 115.78572082519531, "completions/mean_terminated_length": 115.78572082519531, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.29465562105178833, "epoch": 5.1438356164383565, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.32700579972188, "kl": 0.8877751231193542, "learning_rate": 3.715753424657534e-07, "loss": 0.0126, "num_tokens": 9711546.0, "reward": 0.6292468905448914, "reward_std": 0.07785024493932724, "rewards/check_gptzero_func/mean": 0.6292468309402466, "rewards/check_gptzero_func/std": 0.3731510639190674, "sampling/importance_sampling_ratio/max": 1.5277611017227173, "sampling/importance_sampling_ratio/mean": 1.0001276731491089, "sampling/importance_sampling_ratio/min": 0.6148075461387634, "sampling/sampling_logp_difference/max": 0.4864460229873657, "sampling/sampling_logp_difference/mean": 0.013460460118949413, "step": 751 }, { "clip_ratio/high_max": 0.02642706222832203, "clip_ratio/high_mean": 0.017101265490055084, "clip_ratio/low_mean": 0.01840732805430889, "clip_ratio/low_min": 0.005190311465412378, "clip_ratio/region_mean": 0.035508591681718826, "entropy": 0.2925746738910675, "epoch": 5.1506849315068495, "grad_norm": 2.0310751597954657, "kl": 0.8875383734703064, "learning_rate": 3.7140410958904104e-07, "loss": 0.0022, "step": 752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1122.0, "completions/max_terminated_length": 1122.0, "completions/mean_length": 170.2678680419922, "completions/mean_terminated_length": 170.2678680419922, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.39383745193481445, "epoch": 5.157534246575342, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.0486712400080656, "kl": 0.8861178159713745, "learning_rate": 3.7123287671232874e-07, "loss": 0.0326, "num_tokens": 9726149.0, "reward": 0.5682447552680969, "reward_std": 0.07424339652061462, "rewards/check_gptzero_func/mean": 0.5682447552680969, "rewards/check_gptzero_func/std": 0.34132951498031616, "sampling/importance_sampling_ratio/max": 1.5971448421478271, "sampling/importance_sampling_ratio/mean": 0.9996369481086731, "sampling/importance_sampling_ratio/min": 0.6919125914573669, "sampling/sampling_logp_difference/max": 0.4682176113128662, "sampling/sampling_logp_difference/mean": 0.01686929166316986, "step": 753 }, { "clip_ratio/high_max": 0.021830985322594643, "clip_ratio/high_mean": 0.01582561619579792, "clip_ratio/low_mean": 0.012409943155944347, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.028235558420419693, "entropy": 0.39295801520347595, "epoch": 5.164383561643835, "grad_norm": 2.041958610828298, "kl": 0.8849133849143982, "learning_rate": 3.7106164383561644e-07, "loss": 0.0224, "step": 754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 202.50001525878906, "completions/mean_terminated_length": 202.50001525878906, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.4307587742805481, "epoch": 5.171232876712328, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.9882440064441003, "kl": 0.8548944592475891, "learning_rate": 3.708904109589041e-07, "loss": 0.0032, "num_tokens": 9742545.0, "reward": 0.5597397685050964, "reward_std": 0.08807244151830673, "rewards/check_gptzero_func/mean": 0.5597397089004517, "rewards/check_gptzero_func/std": 0.27116596698760986, "sampling/importance_sampling_ratio/max": 1.6027462482452393, "sampling/importance_sampling_ratio/mean": 0.9997811317443848, "sampling/importance_sampling_ratio/min": 0.5793067216873169, "sampling/sampling_logp_difference/max": 0.5459232330322266, "sampling/sampling_logp_difference/mean": 0.018645426258444786, "step": 755 }, { "clip_ratio/high_max": 0.028513237833976746, "clip_ratio/high_mean": 0.019922317937016487, "clip_ratio/low_mean": 0.01326140109449625, "clip_ratio/low_min": 0.007310871034860611, "clip_ratio/region_mean": 0.03318371996283531, "entropy": 0.4332803189754486, "epoch": 5.178082191780822, "grad_norm": 1.5428624089679617, "kl": 0.8466886878013611, "learning_rate": 3.707191780821918e-07, "loss": -0.0073, "step": 756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 832.0, "completions/max_terminated_length": 832.0, "completions/mean_length": 157.375, "completions/mean_terminated_length": 157.375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.3782098591327667, "epoch": 5.184931506849315, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.3441798770692652, "kl": 0.9205150604248047, "learning_rate": 3.7054794520547945e-07, "loss": 0.0501, "num_tokens": 9756070.0, "reward": 0.572066068649292, "reward_std": 0.052327048033475876, "rewards/check_gptzero_func/mean": 0.5720661282539368, "rewards/check_gptzero_func/std": 0.34045180678367615, "sampling/importance_sampling_ratio/max": 1.4606589078903198, "sampling/importance_sampling_ratio/mean": 1.0001846551895142, "sampling/importance_sampling_ratio/min": 0.5991135835647583, "sampling/sampling_logp_difference/max": 0.5123040676116943, "sampling/sampling_logp_difference/mean": 0.01621195301413536, "step": 757 }, { "clip_ratio/high_max": 0.023115577176213264, "clip_ratio/high_mean": 0.01581856794655323, "clip_ratio/low_mean": 0.013581722043454647, "clip_ratio/low_min": 0.00788436271250248, "clip_ratio/region_mean": 0.029400287196040154, "entropy": 0.3807210326194763, "epoch": 5.191780821917808, "grad_norm": 2.0192277227023525, "kl": 0.8881157636642456, "learning_rate": 3.703767123287671e-07, "loss": 0.0391, "step": 758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 939.0, "completions/max_terminated_length": 939.0, "completions/mean_length": 130.05357360839844, "completions/mean_terminated_length": 130.05357360839844, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.32313355803489685, "epoch": 5.198630136986301, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.889123782273249, "kl": 1.1285135746002197, "learning_rate": 3.7020547945205475e-07, "loss": 0.0193, "num_tokens": 9768207.0, "reward": 0.574177622795105, "reward_std": 0.05106533318758011, "rewards/check_gptzero_func/mean": 0.574177622795105, "rewards/check_gptzero_func/std": 0.3161463737487793, "sampling/importance_sampling_ratio/max": 1.4369633197784424, "sampling/importance_sampling_ratio/mean": 0.9998955130577087, "sampling/importance_sampling_ratio/min": 0.6622815728187561, "sampling/sampling_logp_difference/max": 0.41206443309783936, "sampling/sampling_logp_difference/mean": 0.014565419405698776, "step": 759 }, { "clip_ratio/high_max": 0.028387097641825676, "clip_ratio/high_mean": 0.018720654770731926, "clip_ratio/low_mean": 0.014435947872698307, "clip_ratio/low_min": 0.004524887073785067, "clip_ratio/region_mean": 0.03315660357475281, "entropy": 0.32680433988571167, "epoch": 5.205479452054795, "grad_norm": 1.9774509644911076, "kl": 0.9026566743850708, "learning_rate": 3.7003424657534245e-07, "loss": 0.0089, "step": 760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 275.9107360839844, "completions/mean_terminated_length": 275.9107360839844, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.4553506672382355, "epoch": 5.212328767123288, "frac_reward_zero_std": 0.0, "grad_norm": 1.664479908233567, "kl": 0.6474387049674988, "learning_rate": 3.698630136986301e-07, "loss": -0.031, "num_tokens": 9788374.0, "reward": 0.6183808445930481, "reward_std": 0.06088326871395111, "rewards/check_gptzero_func/mean": 0.6183808445930481, "rewards/check_gptzero_func/std": 0.32637009024620056, "sampling/importance_sampling_ratio/max": 1.413801670074463, "sampling/importance_sampling_ratio/mean": 0.9998377561569214, "sampling/importance_sampling_ratio/min": 0.5812225341796875, "sampling/sampling_logp_difference/max": 0.5426216125488281, "sampling/sampling_logp_difference/mean": 0.017812654376029968, "step": 761 }, { "clip_ratio/high_max": 0.03207236900925636, "clip_ratio/high_mean": 0.016419794410467148, "clip_ratio/low_mean": 0.009332435205578804, "clip_ratio/low_min": 0.006137322634458542, "clip_ratio/region_mean": 0.025752229616045952, "entropy": 0.4592776894569397, "epoch": 5.219178082191781, "grad_norm": 1.6555734003045735, "kl": 0.6195319294929504, "learning_rate": 3.6969178082191775e-07, "loss": -0.0405, "step": 762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1240.0, "completions/max_terminated_length": 1240.0, "completions/mean_length": 255.00001525878906, "completions/mean_terminated_length": 255.00001525878906, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.4423768222332001, "epoch": 5.226027397260274, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.7276906080164474, "kl": 0.9416056275367737, "learning_rate": 3.695205479452055e-07, "loss": 0.0231, "num_tokens": 9807520.0, "reward": 0.4789026379585266, "reward_std": 0.0910295695066452, "rewards/check_gptzero_func/mean": 0.4789026081562042, "rewards/check_gptzero_func/std": 0.2941383421421051, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.001312494277954, "sampling/importance_sampling_ratio/min": 0.6394056081771851, "sampling/sampling_logp_difference/max": 0.7287653684616089, "sampling/sampling_logp_difference/mean": 0.019288968294858932, "step": 763 }, { "clip_ratio/high_max": 0.015006821602582932, "clip_ratio/high_mean": 0.0101157296448946, "clip_ratio/low_mean": 0.01302303932607174, "clip_ratio/low_min": 0.006156119052320719, "clip_ratio/region_mean": 0.02313877083361149, "entropy": 0.44206398725509644, "epoch": 5.232876712328767, "grad_norm": 1.5924005785487765, "kl": 0.9431816339492798, "learning_rate": 3.6934931506849316e-07, "loss": 0.0131, "step": 764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1206.0, "completions/max_terminated_length": 1206.0, "completions/mean_length": 161.9107208251953, "completions/mean_terminated_length": 161.9107208251953, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.4127993583679199, "epoch": 5.239726027397261, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.190205368374907, "kl": 0.9602909088134766, "learning_rate": 3.691780821917808e-07, "loss": -0.0021, "num_tokens": 9821453.0, "reward": 0.5396552085876465, "reward_std": 0.11015090346336365, "rewards/check_gptzero_func/mean": 0.5396551489830017, "rewards/check_gptzero_func/std": 0.3419610857963562, "sampling/importance_sampling_ratio/max": 1.483607530593872, "sampling/importance_sampling_ratio/mean": 1.0001943111419678, "sampling/importance_sampling_ratio/min": 0.633033812046051, "sampling/sampling_logp_difference/max": 0.4572315216064453, "sampling/sampling_logp_difference/mean": 0.01758922077715397, "step": 765 }, { "clip_ratio/high_max": 0.02369668334722519, "clip_ratio/high_mean": 0.01674283854663372, "clip_ratio/low_mean": 0.014386826194822788, "clip_ratio/low_min": 0.008231807500123978, "clip_ratio/region_mean": 0.031129663810133934, "entropy": 0.4166423976421356, "epoch": 5.2465753424657535, "grad_norm": 1.8032729631432312, "kl": 0.9109545350074768, "learning_rate": 3.6900684931506846e-07, "loss": -0.0127, "step": 766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1514.0, "completions/max_terminated_length": 1514.0, "completions/mean_length": 140.75, "completions/mean_terminated_length": 140.75, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.3128121495246887, "epoch": 5.2534246575342465, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.345978869328876, "kl": 1.0339090824127197, "learning_rate": 3.6883561643835616e-07, "loss": 0.0293, "num_tokens": 9833793.0, "reward": 0.6226686835289001, "reward_std": 0.15454840660095215, "rewards/check_gptzero_func/mean": 0.6226686835289001, "rewards/check_gptzero_func/std": 0.2880636155605316, "sampling/importance_sampling_ratio/max": 1.4353173971176147, "sampling/importance_sampling_ratio/mean": 1.0000433921813965, "sampling/importance_sampling_ratio/min": 0.6064277291297913, "sampling/sampling_logp_difference/max": 0.5001697540283203, "sampling/sampling_logp_difference/mean": 0.014963973313570023, "step": 767 }, { "clip_ratio/high_max": 0.025099074468016624, "clip_ratio/high_mean": 0.01675335131585598, "clip_ratio/low_mean": 0.012872260995209217, "clip_ratio/low_min": 0.004201680887490511, "clip_ratio/region_mean": 0.02962561324238777, "entropy": 0.31453937292099, "epoch": 5.260273972602739, "grad_norm": 1.7911509401698424, "kl": 1.036436676979065, "learning_rate": 3.686643835616438e-07, "loss": 0.0176, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 803.0, "completions/max_terminated_length": 803.0, "completions/mean_length": 134.08929443359375, "completions/mean_terminated_length": 134.08929443359375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.3276616632938385, "epoch": 5.267123287671233, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.3575726601468294, "kl": 1.013445258140564, "learning_rate": 3.6849315068493147e-07, "loss": 0.0302, "num_tokens": 9845466.0, "reward": 0.6469688415527344, "reward_std": 0.076780766248703, "rewards/check_gptzero_func/mean": 0.6469688415527344, "rewards/check_gptzero_func/std": 0.34713369607925415, "sampling/importance_sampling_ratio/max": 1.5230382680892944, "sampling/importance_sampling_ratio/mean": 0.9997847676277161, "sampling/importance_sampling_ratio/min": 0.6182060837745667, "sampling/sampling_logp_difference/max": 0.48093342781066895, "sampling/sampling_logp_difference/mean": 0.015054743736982346, "step": 769 }, { "clip_ratio/high_max": 0.022360248491168022, "clip_ratio/high_mean": 0.015197926200926304, "clip_ratio/low_mean": 0.01360479649156332, "clip_ratio/low_min": 0.004889975767582655, "clip_ratio/region_mean": 0.028802722692489624, "entropy": 0.3282018303871155, "epoch": 5.273972602739726, "grad_norm": 2.9224994467098413, "kl": 0.9589834213256836, "learning_rate": 3.683219178082191e-07, "loss": 0.0201, "step": 770 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1088.0, "completions/max_terminated_length": 1088.0, "completions/mean_length": 156.3928680419922, "completions/mean_terminated_length": 156.3928680419922, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.3199401795864105, "epoch": 5.280821917808219, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.094758308019362, "kl": 0.9160540699958801, "learning_rate": 3.681506849315068e-07, "loss": 0.0248, "num_tokens": 9859344.0, "reward": 0.7003781795501709, "reward_std": 0.07127363234758377, "rewards/check_gptzero_func/mean": 0.7003781199455261, "rewards/check_gptzero_func/std": 0.37163904309272766, "sampling/importance_sampling_ratio/max": 1.5249971151351929, "sampling/importance_sampling_ratio/mean": 0.9995724558830261, "sampling/importance_sampling_ratio/min": 0.718036413192749, "sampling/sampling_logp_difference/max": 0.42199254035949707, "sampling/sampling_logp_difference/mean": 0.013435257598757744, "step": 771 }, { "clip_ratio/high_max": 0.03949446976184845, "clip_ratio/high_mean": 0.01636197790503502, "clip_ratio/low_mean": 0.014934190548956394, "clip_ratio/low_min": 0.0014367816038429737, "clip_ratio/region_mean": 0.03129616752266884, "entropy": 0.31792783737182617, "epoch": 5.287671232876712, "grad_norm": 2.3139555126985436, "kl": 0.970866858959198, "learning_rate": 3.679794520547945e-07, "loss": 0.0153, "step": 772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 999.0, "completions/max_terminated_length": 999.0, "completions/mean_length": 196.7857208251953, "completions/mean_terminated_length": 196.7857208251953, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.4279080927371979, "epoch": 5.294520547945205, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.9581708387966548, "kl": 0.7587724328041077, "learning_rate": 3.6780821917808217e-07, "loss": -0.023, "num_tokens": 9875010.0, "reward": 0.53397536277771, "reward_std": 0.1103995144367218, "rewards/check_gptzero_func/mean": 0.5339753031730652, "rewards/check_gptzero_func/std": 0.36747434735298157, "sampling/importance_sampling_ratio/max": 1.4252873659133911, "sampling/importance_sampling_ratio/mean": 1.0004299879074097, "sampling/importance_sampling_ratio/min": 0.6636648178100586, "sampling/sampling_logp_difference/max": 0.4099780321121216, "sampling/sampling_logp_difference/mean": 0.01689441129565239, "step": 773 }, { "clip_ratio/high_max": 0.020642202347517014, "clip_ratio/high_mean": 0.014923910610377789, "clip_ratio/low_mean": 0.010087205097079277, "clip_ratio/low_min": 0.00505902199074626, "clip_ratio/region_mean": 0.02501111663877964, "entropy": 0.4285281002521515, "epoch": 5.301369863013699, "grad_norm": 1.5972953912941619, "kl": 0.7299056649208069, "learning_rate": 3.676369863013699e-07, "loss": -0.0334, "step": 774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1083.0, "completions/mean_length": 190.35714721679688, "completions/mean_terminated_length": 139.27272033691406, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.3375980257987976, "epoch": 5.308219178082192, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.869382785850905, "kl": 0.9451435804367065, "learning_rate": 3.6746575342465753e-07, "loss": 0.1735, "num_tokens": 9890764.0, "reward": 0.6499366760253906, "reward_std": 0.10421988368034363, "rewards/check_gptzero_func/mean": 0.6499366760253906, "rewards/check_gptzero_func/std": 0.36213767528533936, "sampling/importance_sampling_ratio/max": 1.6266926527023315, "sampling/importance_sampling_ratio/mean": 1.0007078647613525, "sampling/importance_sampling_ratio/min": 0.5678802132606506, "sampling/sampling_logp_difference/max": 0.5658447742462158, "sampling/sampling_logp_difference/mean": 0.014758319593966007, "step": 775 }, { "clip_ratio/high_max": 0.024468084797263145, "clip_ratio/high_mean": 0.014995947480201721, "clip_ratio/low_mean": 0.01564526930451393, "clip_ratio/low_min": 0.010309278033673763, "clip_ratio/region_mean": 0.030641216784715652, "entropy": 0.33943334221839905, "epoch": 5.315068493150685, "grad_norm": 2.0948537208952747, "kl": 0.8729971051216125, "learning_rate": 3.672945205479452e-07, "loss": 0.1657, "step": 776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 914.0, "completions/max_terminated_length": 914.0, "completions/mean_length": 156.5, "completions/mean_terminated_length": 156.5, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.37611716985702515, "epoch": 5.321917808219178, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.2013049792001347, "kl": 1.0886156558990479, "learning_rate": 3.6712328767123283e-07, "loss": 0.0103, "num_tokens": 9904286.0, "reward": 0.63649582862854, "reward_std": 0.07816994190216064, "rewards/check_gptzero_func/mean": 0.6364957690238953, "rewards/check_gptzero_func/std": 0.32088702917099, "sampling/importance_sampling_ratio/max": 1.6176550388336182, "sampling/importance_sampling_ratio/mean": 1.0006898641586304, "sampling/importance_sampling_ratio/min": 0.6259439587593079, "sampling/sampling_logp_difference/max": 0.48097753524780273, "sampling/sampling_logp_difference/mean": 0.017888838425278664, "step": 777 }, { "clip_ratio/high_max": 0.025510204955935478, "clip_ratio/high_mean": 0.01707073114812374, "clip_ratio/low_mean": 0.01588810607790947, "clip_ratio/low_min": 0.010303377173841, "clip_ratio/region_mean": 0.03295883908867836, "entropy": 0.3765891194343567, "epoch": 5.328767123287671, "grad_norm": 1.8847359621799884, "kl": 1.0770996809005737, "learning_rate": 3.6695205479452053e-07, "loss": -0.001, "step": 778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 944.0, "completions/max_terminated_length": 944.0, "completions/mean_length": 182.82144165039062, "completions/mean_terminated_length": 182.82144165039062, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.44570785760879517, "epoch": 5.335616438356165, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.011217276245374, "kl": 0.8463754653930664, "learning_rate": 3.667808219178082e-07, "loss": 0.0087, "num_tokens": 9919552.0, "reward": 0.5682305097579956, "reward_std": 0.09045989066362381, "rewards/check_gptzero_func/mean": 0.5682304501533508, "rewards/check_gptzero_func/std": 0.35422733426094055, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0003173351287842, "sampling/importance_sampling_ratio/min": 0.6147480607032776, "sampling/sampling_logp_difference/max": 0.7330436706542969, "sampling/sampling_logp_difference/mean": 0.01858655922114849, "step": 779 }, { "clip_ratio/high_max": 0.021297192201018333, "clip_ratio/high_mean": 0.01580885984003544, "clip_ratio/low_mean": 0.01360623724758625, "clip_ratio/low_min": 0.005635024048388004, "clip_ratio/region_mean": 0.02941509708762169, "entropy": 0.44739022850990295, "epoch": 5.342465753424658, "grad_norm": 2.002119129988526, "kl": 0.796737790107727, "learning_rate": 3.666095890410959e-07, "loss": -0.0012, "step": 780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 148.4107208251953, "completions/mean_terminated_length": 148.4107208251953, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.35768580436706543, "epoch": 5.3493150684931505, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.2895681270739376, "kl": 0.9593043327331543, "learning_rate": 3.664383561643836e-07, "loss": 0.0352, "num_tokens": 9932673.0, "reward": 0.637297511100769, "reward_std": 0.1225898489356041, "rewards/check_gptzero_func/mean": 0.637297511100769, "rewards/check_gptzero_func/std": 0.36593008041381836, "sampling/importance_sampling_ratio/max": 1.6297928094863892, "sampling/importance_sampling_ratio/mean": 1.0007222890853882, "sampling/importance_sampling_ratio/min": 0.5504357814788818, "sampling/sampling_logp_difference/max": 0.5970449447631836, "sampling/sampling_logp_difference/mean": 0.016271300613880157, "step": 781 }, { "clip_ratio/high_max": 0.02500000037252903, "clip_ratio/high_mean": 0.017712611705064774, "clip_ratio/low_mean": 0.015016615390777588, "clip_ratio/low_min": 0.006359300576150417, "clip_ratio/region_mean": 0.03272922709584236, "entropy": 0.3576567769050598, "epoch": 5.3561643835616435, "grad_norm": 1.8693706258192444, "kl": 0.9782935976982117, "learning_rate": 3.6626712328767124e-07, "loss": 0.0239, "step": 782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 970.0, "completions/max_terminated_length": 970.0, "completions/mean_length": 159.0357208251953, "completions/mean_terminated_length": 159.0357208251953, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.28530052304267883, "epoch": 5.363013698630137, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 2.092882979827154, "kl": 0.7967061996459961, "learning_rate": 3.660958904109589e-07, "loss": 0.0399, "num_tokens": 9946161.0, "reward": 0.6989007592201233, "reward_std": 0.05959783494472504, "rewards/check_gptzero_func/mean": 0.6989006996154785, "rewards/check_gptzero_func/std": 0.33494681119918823, "sampling/importance_sampling_ratio/max": 1.474100947380066, "sampling/importance_sampling_ratio/mean": 1.0003068447113037, "sampling/importance_sampling_ratio/min": 0.4161491394042969, "sampling/sampling_logp_difference/max": 0.8767116069793701, "sampling/sampling_logp_difference/mean": 0.012865650467574596, "step": 783 }, { "clip_ratio/high_max": 0.01923076994717121, "clip_ratio/high_mean": 0.011297019198536873, "clip_ratio/low_mean": 0.011856941506266594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.023153960704803467, "entropy": 0.2850783169269562, "epoch": 5.36986301369863, "grad_norm": 1.5052101015490524, "kl": 0.8003177642822266, "learning_rate": 3.6592465753424654e-07, "loss": 0.0298, "step": 784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 956.0, "completions/max_terminated_length": 956.0, "completions/mean_length": 155.83929443359375, "completions/mean_terminated_length": 155.83929443359375, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.3415128290653229, "epoch": 5.376712328767123, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.0894513149342733, "kl": 0.9482752680778503, "learning_rate": 3.6575342465753424e-07, "loss": 0.001, "num_tokens": 9959246.0, "reward": 0.6492621898651123, "reward_std": 0.08652929216623306, "rewards/check_gptzero_func/mean": 0.6492621302604675, "rewards/check_gptzero_func/std": 0.34761396050453186, "sampling/importance_sampling_ratio/max": 1.550584316253662, "sampling/importance_sampling_ratio/mean": 1.0003396272659302, "sampling/importance_sampling_ratio/min": 0.6143982410430908, "sampling/sampling_logp_difference/max": 0.48711204528808594, "sampling/sampling_logp_difference/mean": 0.014518167823553085, "step": 785 }, { "clip_ratio/high_max": 0.023705003783106804, "clip_ratio/high_mean": 0.01453372836112976, "clip_ratio/low_mean": 0.009900202974677086, "clip_ratio/low_min": 0.0016949152341112494, "clip_ratio/region_mean": 0.024433929473161697, "entropy": 0.34214216470718384, "epoch": 5.383561643835616, "grad_norm": 1.6582092117691007, "kl": 0.9404706954956055, "learning_rate": 3.655821917808219e-07, "loss": -0.0102, "step": 786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1708.0, "completions/max_terminated_length": 1708.0, "completions/mean_length": 149.6607208251953, "completions/mean_terminated_length": 149.6607208251953, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.3196355700492859, "epoch": 5.390410958904109, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.368303475353587, "kl": 1.324372410774231, "learning_rate": 3.6541095890410954e-07, "loss": 0.0116, "num_tokens": 9972135.0, "reward": 0.6110037565231323, "reward_std": 0.07677780091762543, "rewards/check_gptzero_func/mean": 0.6110037565231323, "rewards/check_gptzero_func/std": 0.3655637204647064, "sampling/importance_sampling_ratio/max": 1.6107885837554932, "sampling/importance_sampling_ratio/mean": 0.9994202852249146, "sampling/importance_sampling_ratio/min": 0.4309265613555908, "sampling/sampling_logp_difference/max": 0.8418176174163818, "sampling/sampling_logp_difference/mean": 0.013907359912991524, "step": 787 }, { "clip_ratio/high_max": 0.023076923564076424, "clip_ratio/high_mean": 0.0166624803096056, "clip_ratio/low_mean": 0.009999281726777554, "clip_ratio/low_min": 0.0028653296176344156, "clip_ratio/region_mean": 0.026661762967705727, "entropy": 0.32179492712020874, "epoch": 5.397260273972603, "grad_norm": 1.802214047995351, "kl": 1.0973091125488281, "learning_rate": 3.6523972602739725e-07, "loss": 0.0001, "step": 788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1129.0, "completions/max_terminated_length": 1129.0, "completions/mean_length": 205.42857360839844, "completions/mean_terminated_length": 205.42857360839844, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.3655531108379364, "epoch": 5.404109589041096, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.9711194609640525, "kl": 0.8733779191970825, "learning_rate": 3.6506849315068495e-07, "loss": 0.0194, "num_tokens": 9988443.0, "reward": 0.6335744261741638, "reward_std": 0.06355442851781845, "rewards/check_gptzero_func/mean": 0.6335744261741638, "rewards/check_gptzero_func/std": 0.31322938203811646, "sampling/importance_sampling_ratio/max": 1.7476297616958618, "sampling/importance_sampling_ratio/mean": 1.0007940530776978, "sampling/importance_sampling_ratio/min": 0.6391705870628357, "sampling/sampling_logp_difference/max": 0.558260440826416, "sampling/sampling_logp_difference/mean": 0.01458166353404522, "step": 789 }, { "clip_ratio/high_max": 0.018564356490969658, "clip_ratio/high_mean": 0.01095071155577898, "clip_ratio/low_mean": 0.011336694471538067, "clip_ratio/low_min": 0.004914004821330309, "clip_ratio/region_mean": 0.022287407889962196, "entropy": 0.3649047911167145, "epoch": 5.410958904109589, "grad_norm": 1.604084616970772, "kl": 0.8591457009315491, "learning_rate": 3.648972602739726e-07, "loss": 0.0084, "step": 790 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 995.0, "completions/max_terminated_length": 995.0, "completions/mean_length": 178.85714721679688, "completions/mean_terminated_length": 178.85714721679688, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.391783207654953, "epoch": 5.417808219178082, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.9892417248384586, "kl": 0.8792260885238647, "learning_rate": 3.6472602739726025e-07, "loss": 0.0136, "num_tokens": 10003169.0, "reward": 0.5759463310241699, "reward_std": 0.10376670211553574, "rewards/check_gptzero_func/mean": 0.5759463310241699, "rewards/check_gptzero_func/std": 0.34075793623924255, "sampling/importance_sampling_ratio/max": 1.6573729515075684, "sampling/importance_sampling_ratio/mean": 1.0001909732818604, "sampling/importance_sampling_ratio/min": 0.610870361328125, "sampling/sampling_logp_difference/max": 0.5052337646484375, "sampling/sampling_logp_difference/mean": 0.017101818695664406, "step": 791 }, { "clip_ratio/high_max": 0.026350460946559906, "clip_ratio/high_mean": 0.014949550852179527, "clip_ratio/low_mean": 0.014025171287357807, "clip_ratio/low_min": 0.009348914958536625, "clip_ratio/region_mean": 0.02897472120821476, "entropy": 0.391783207654953, "epoch": 5.424657534246576, "grad_norm": 1.6158558378028545, "kl": 0.8810811638832092, "learning_rate": 3.6455479452054795e-07, "loss": 0.0025, "step": 792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 209.8928680419922, "completions/mean_terminated_length": 209.8928680419922, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.41469642519950867, "epoch": 5.431506849315069, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.214137836324333, "kl": 0.8291907906532288, "learning_rate": 3.643835616438356e-07, "loss": 0.0125, "num_tokens": 10019695.0, "reward": 0.6990090012550354, "reward_std": 0.06254611909389496, "rewards/check_gptzero_func/mean": 0.6990088820457458, "rewards/check_gptzero_func/std": 0.3088633716106415, "sampling/importance_sampling_ratio/max": 1.5746201276779175, "sampling/importance_sampling_ratio/mean": 0.999271810054779, "sampling/importance_sampling_ratio/min": 0.6347111463546753, "sampling/sampling_logp_difference/max": 0.45458531379699707, "sampling/sampling_logp_difference/mean": 0.01602335274219513, "step": 793 }, { "clip_ratio/high_max": 0.013649136759340763, "clip_ratio/high_mean": 0.01081375777721405, "clip_ratio/low_mean": 0.007301590405404568, "clip_ratio/low_min": 0.0030120480805635452, "clip_ratio/region_mean": 0.018115345388650894, "entropy": 0.4164714217185974, "epoch": 5.438356164383562, "grad_norm": 1.5974388427823731, "kl": 0.7444027066230774, "learning_rate": 3.6421232876712325e-07, "loss": 0.0023, "step": 794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1171.0, "completions/max_terminated_length": 1171.0, "completions/mean_length": 181.35714721679688, "completions/mean_terminated_length": 181.35714721679688, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.417708158493042, "epoch": 5.445205479452055, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.9438930236990604, "kl": 0.884731113910675, "learning_rate": 3.6404109589041096e-07, "loss": 0.0541, "num_tokens": 10034631.0, "reward": 0.6105044484138489, "reward_std": 0.12288656085729599, "rewards/check_gptzero_func/mean": 0.6105044484138489, "rewards/check_gptzero_func/std": 0.31916388869285583, "sampling/importance_sampling_ratio/max": 1.6010768413543701, "sampling/importance_sampling_ratio/mean": 1.0000592470169067, "sampling/importance_sampling_ratio/min": 0.6121135354042053, "sampling/sampling_logp_difference/max": 0.49083757400512695, "sampling/sampling_logp_difference/mean": 0.017028771340847015, "step": 795 }, { "clip_ratio/high_max": 0.02087286487221718, "clip_ratio/high_mean": 0.014234274625778198, "clip_ratio/low_mean": 0.012592137791216373, "clip_ratio/low_min": 0.007590132765471935, "clip_ratio/region_mean": 0.026826411485671997, "entropy": 0.41707828640937805, "epoch": 5.4520547945205475, "grad_norm": 1.6974520406008902, "kl": 0.8876372575759888, "learning_rate": 3.638698630136986e-07, "loss": 0.043, "step": 796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 582.0, "completions/max_terminated_length": 582.0, "completions/mean_length": 105.53572082519531, "completions/mean_terminated_length": 105.53572082519531, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.2915568947792053, "epoch": 5.458904109589041, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.2164954532364294, "kl": 0.9904333353042603, "learning_rate": 3.6369863013698626e-07, "loss": 0.0198, "num_tokens": 10045243.0, "reward": 0.6028718948364258, "reward_std": 0.04521416872739792, "rewards/check_gptzero_func/mean": 0.6028718948364258, "rewards/check_gptzero_func/std": 0.33513104915618896, "sampling/importance_sampling_ratio/max": 1.505447268486023, "sampling/importance_sampling_ratio/mean": 0.9990344047546387, "sampling/importance_sampling_ratio/min": 0.6502124071121216, "sampling/sampling_logp_difference/max": 0.4304562211036682, "sampling/sampling_logp_difference/mean": 0.01471659354865551, "step": 797 }, { "clip_ratio/high_max": 0.04137931019067764, "clip_ratio/high_mean": 0.021722128614783287, "clip_ratio/low_mean": 0.01309011410921812, "clip_ratio/low_min": 0.00455580884590745, "clip_ratio/region_mean": 0.03481224551796913, "entropy": 0.2938135266304016, "epoch": 5.465753424657534, "grad_norm": 1.9402734997264315, "kl": 0.9928279519081116, "learning_rate": 3.6352739726027396e-07, "loss": 0.0096, "step": 798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1266.0, "completions/max_terminated_length": 1266.0, "completions/mean_length": 283.2857360839844, "completions/mean_terminated_length": 283.2857360839844, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "entropy": 0.6295213103294373, "epoch": 5.472602739726027, "frac_reward_zero_std": 0.0, "grad_norm": 1.7021698629353268, "kl": 0.6587641835212708, "learning_rate": 3.6335616438356167e-07, "loss": 0.0163, "num_tokens": 10066165.0, "reward": 0.5423160791397095, "reward_std": 0.10660456866025925, "rewards/check_gptzero_func/mean": 0.5423160195350647, "rewards/check_gptzero_func/std": 0.2609923183917999, "sampling/importance_sampling_ratio/max": 1.5425554513931274, "sampling/importance_sampling_ratio/mean": 0.9998467564582825, "sampling/importance_sampling_ratio/min": 0.4602356255054474, "sampling/sampling_logp_difference/max": 0.7760167121887207, "sampling/sampling_logp_difference/mean": 0.02140697091817856, "step": 799 }, { "clip_ratio/high_max": 0.01549463625997305, "clip_ratio/high_mean": 0.011595338582992554, "clip_ratio/low_mean": 0.007582328747957945, "clip_ratio/low_min": 0.0036008229944854975, "clip_ratio/region_mean": 0.019177665933966637, "entropy": 0.6309532523155212, "epoch": 5.47945205479452, "grad_norm": 1.5609941985416285, "kl": 0.6531429290771484, "learning_rate": 3.631849315068493e-07, "loss": 0.0062, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1063.0, "completions/max_terminated_length": 1063.0, "completions/mean_length": 186.1428680419922, "completions/mean_terminated_length": 186.1428680419922, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.3389703631401062, "epoch": 5.486301369863014, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.875913232818228, "kl": 0.9194695353507996, "learning_rate": 3.6301369863013697e-07, "loss": -0.0336, "num_tokens": 10081709.0, "reward": 0.6641942262649536, "reward_std": 0.09937628358602524, "rewards/check_gptzero_func/mean": 0.6641941666603088, "rewards/check_gptzero_func/std": 0.2831772267818451, "sampling/importance_sampling_ratio/max": 1.4433261156082153, "sampling/importance_sampling_ratio/mean": 1.0002163648605347, "sampling/importance_sampling_ratio/min": 0.6787719130516052, "sampling/sampling_logp_difference/max": 0.3874701261520386, "sampling/sampling_logp_difference/mean": 0.015655728057026863, "step": 801 }, { "clip_ratio/high_max": 0.018648019060492516, "clip_ratio/high_mean": 0.01315753161907196, "clip_ratio/low_mean": 0.010817165486514568, "clip_ratio/low_min": 0.0053956834599375725, "clip_ratio/region_mean": 0.023974696174263954, "entropy": 0.33934980630874634, "epoch": 5.493150684931507, "grad_norm": 1.5275461761226268, "kl": 0.9085630178451538, "learning_rate": 3.628424657534246e-07, "loss": -0.0442, "step": 802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1022.0, "completions/max_terminated_length": 1022.0, "completions/mean_length": 240.7857208251953, "completions/mean_terminated_length": 240.7857208251953, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.44365209341049194, "epoch": 5.5, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.6905676962413545, "kl": 0.7640876770019531, "learning_rate": 3.626712328767123e-07, "loss": 0.0322, "num_tokens": 10100689.0, "reward": 0.5781663656234741, "reward_std": 0.10403653979301453, "rewards/check_gptzero_func/mean": 0.5781663656234741, "rewards/check_gptzero_func/std": 0.28461727499961853, "sampling/importance_sampling_ratio/max": 1.6629855632781982, "sampling/importance_sampling_ratio/mean": 1.0004631280899048, "sampling/importance_sampling_ratio/min": 0.6291624903678894, "sampling/sampling_logp_difference/max": 0.5086145401000977, "sampling/sampling_logp_difference/mean": 0.017691995948553085, "step": 803 }, { "clip_ratio/high_max": 0.015027322806417942, "clip_ratio/high_mean": 0.012078036554157734, "clip_ratio/low_mean": 0.008694611489772797, "clip_ratio/low_min": 0.005571030545979738, "clip_ratio/region_mean": 0.020772648975253105, "entropy": 0.44325417280197144, "epoch": 5.506849315068493, "grad_norm": 1.4718707878907393, "kl": 0.7666873335838318, "learning_rate": 3.6249999999999997e-07, "loss": 0.0223, "step": 804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 241.75001525878906, "completions/mean_terminated_length": 241.75001525878906, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.4476093351840973, "epoch": 5.513698630136986, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.7558459784160947, "kl": 0.8308919668197632, "learning_rate": 3.623287671232876e-07, "loss": -0.0008, "num_tokens": 10119345.0, "reward": 0.6092221140861511, "reward_std": 0.10076430439949036, "rewards/check_gptzero_func/mean": 0.6092220544815063, "rewards/check_gptzero_func/std": 0.3393411636352539, "sampling/importance_sampling_ratio/max": 1.5735645294189453, "sampling/importance_sampling_ratio/mean": 1.0002039670944214, "sampling/importance_sampling_ratio/min": 0.6151832938194275, "sampling/sampling_logp_difference/max": 0.48583507537841797, "sampling/sampling_logp_difference/mean": 0.016166849061846733, "step": 805 }, { "clip_ratio/high_max": 0.01607717014849186, "clip_ratio/high_mean": 0.010882863774895668, "clip_ratio/low_mean": 0.006680260878056288, "clip_ratio/low_min": 0.0034305318258702755, "clip_ratio/region_mean": 0.017563125118613243, "entropy": 0.44923368096351624, "epoch": 5.52054794520548, "grad_norm": 1.5371415175295071, "kl": 0.7501874566078186, "learning_rate": 3.621575342465753e-07, "loss": -0.0108, "step": 806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 220.9107208251953, "completions/mean_terminated_length": 220.9107208251953, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.45116037130355835, "epoch": 5.527397260273973, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 17.129797632467156, "kl": 4.760258674621582, "learning_rate": 3.6198630136986303e-07, "loss": 0.0843, "num_tokens": 10136644.0, "reward": 0.5525644421577454, "reward_std": 0.07771046459674835, "rewards/check_gptzero_func/mean": 0.5525644421577454, "rewards/check_gptzero_func/std": 0.34127211570739746, "sampling/importance_sampling_ratio/max": 1.8569053411483765, "sampling/importance_sampling_ratio/mean": 0.999944806098938, "sampling/importance_sampling_ratio/min": 0.6062721610069275, "sampling/sampling_logp_difference/max": 0.6189112663269043, "sampling/sampling_logp_difference/mean": 0.017567956820130348, "step": 807 }, { "clip_ratio/high_max": 0.010033444501459599, "clip_ratio/high_mean": 0.004854331258684397, "clip_ratio/low_mean": 0.003334873588755727, "clip_ratio/low_min": 0.0015898251440376043, "clip_ratio/region_mean": 0.008189204148948193, "entropy": 0.45332351326942444, "epoch": 5.534246575342466, "grad_norm": 1.7922421581357655, "kl": 1.0052303075790405, "learning_rate": 3.618150684931507e-07, "loss": 0.0647, "step": 808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 892.0, "completions/max_terminated_length": 892.0, "completions/mean_length": 105.00000762939453, "completions/mean_terminated_length": 105.00000762939453, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.31868425011634827, "epoch": 5.541095890410959, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 2.50644957347206, "kl": 0.9710536599159241, "learning_rate": 3.6164383561643833e-07, "loss": -0.0429, "num_tokens": 10146786.0, "reward": 0.6509725451469421, "reward_std": 0.08995477110147476, "rewards/check_gptzero_func/mean": 0.6509724855422974, "rewards/check_gptzero_func/std": 0.3373197019100189, "sampling/importance_sampling_ratio/max": 1.6331247091293335, "sampling/importance_sampling_ratio/mean": 0.9993941187858582, "sampling/importance_sampling_ratio/min": 0.5668323040008545, "sampling/sampling_logp_difference/max": 0.5676918029785156, "sampling/sampling_logp_difference/mean": 0.012637085281312466, "step": 809 }, { "clip_ratio/high_max": 0.0309563297778368, "clip_ratio/high_mean": 0.018222829326987267, "clip_ratio/low_mean": 0.014375874772667885, "clip_ratio/low_min": 0.004746835213154554, "clip_ratio/region_mean": 0.03259870782494545, "entropy": 0.3199727237224579, "epoch": 5.5479452054794525, "grad_norm": 1.9670164159449826, "kl": 0.9436951875686646, "learning_rate": 3.6147260273972603e-07, "loss": -0.0546, "step": 810 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 685.0, "completions/max_terminated_length": 685.0, "completions/mean_length": 159.2857208251953, "completions/mean_terminated_length": 159.2857208251953, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.36063912510871887, "epoch": 5.554794520547945, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 2.193604961985757, "kl": 0.8274809718132019, "learning_rate": 3.613013698630137e-07, "loss": 0.0131, "num_tokens": 10160722.0, "reward": 0.6643030047416687, "reward_std": 0.085960753262043, "rewards/check_gptzero_func/mean": 0.6643029451370239, "rewards/check_gptzero_func/std": 0.3196704685688019, "sampling/importance_sampling_ratio/max": 1.6502361297607422, "sampling/importance_sampling_ratio/mean": 0.9999763369560242, "sampling/importance_sampling_ratio/min": 0.40564608573913574, "sampling/sampling_logp_difference/max": 0.9022742509841919, "sampling/sampling_logp_difference/mean": 0.014895143918693066, "step": 811 }, { "clip_ratio/high_max": 0.024985473603010178, "clip_ratio/high_mean": 0.015840914100408554, "clip_ratio/low_mean": 0.012756682932376862, "clip_ratio/low_min": 0.002277904422953725, "clip_ratio/region_mean": 0.028597597032785416, "entropy": 0.36255350708961487, "epoch": 5.561643835616438, "grad_norm": 1.9449323356422408, "kl": 0.7948831915855408, "learning_rate": 3.6113013698630133e-07, "loss": 0.0033, "step": 812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1050.0, "completions/max_terminated_length": 1050.0, "completions/mean_length": 283.5535888671875, "completions/mean_terminated_length": 283.5535888671875, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.4967065155506134, "epoch": 5.568493150684931, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.007245835278997, "kl": 0.7372958064079285, "learning_rate": 3.6095890410958904e-07, "loss": 0.0124, "num_tokens": 10181261.0, "reward": 0.5716364979743958, "reward_std": 0.09514226764440536, "rewards/check_gptzero_func/mean": 0.5716364979743958, "rewards/check_gptzero_func/std": 0.3407149910926819, "sampling/importance_sampling_ratio/max": 1.6214115619659424, "sampling/importance_sampling_ratio/mean": 0.999923050403595, "sampling/importance_sampling_ratio/min": 0.4034159779548645, "sampling/sampling_logp_difference/max": 0.9077870845794678, "sampling/sampling_logp_difference/mean": 0.019351400434970856, "step": 813 }, { "clip_ratio/high_max": 0.017117613926529884, "clip_ratio/high_mean": 0.009465379640460014, "clip_ratio/low_mean": 0.006492596585303545, "clip_ratio/low_min": 0.002436647191643715, "clip_ratio/region_mean": 0.015957975760102272, "entropy": 0.4977279007434845, "epoch": 5.575342465753424, "grad_norm": 1.9094307394325238, "kl": 0.6299653649330139, "learning_rate": 3.607876712328767e-07, "loss": 0.0046, "step": 814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1475.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 185.6428680419922, "completions/mean_terminated_length": 185.6428680419922, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.32295769453048706, "epoch": 5.582191780821918, "frac_reward_zero_std": 0.0, "grad_norm": 1.7378551724257603, "kl": 0.8194909691810608, "learning_rate": 3.6061643835616434e-07, "loss": 0.0379, "num_tokens": 10196245.0, "reward": 0.6740065813064575, "reward_std": 0.055776916444301605, "rewards/check_gptzero_func/mean": 0.6740065217018127, "rewards/check_gptzero_func/std": 0.3313392102718353, "sampling/importance_sampling_ratio/max": 1.435862421989441, "sampling/importance_sampling_ratio/mean": 1.0001577138900757, "sampling/importance_sampling_ratio/min": 0.6547118425369263, "sampling/sampling_logp_difference/max": 0.42356014251708984, "sampling/sampling_logp_difference/mean": 0.012809855863451958, "step": 815 }, { "clip_ratio/high_max": 0.018929151818156242, "clip_ratio/high_mean": 0.009205281734466553, "clip_ratio/low_mean": 0.008885408751666546, "clip_ratio/low_min": 0.0020920501556247473, "clip_ratio/region_mean": 0.018090691417455673, "entropy": 0.3217506408691406, "epoch": 5.589041095890411, "grad_norm": 1.533102678717719, "kl": 0.8270029425621033, "learning_rate": 3.6044520547945204e-07, "loss": 0.0278, "step": 816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 588.0, "completions/max_terminated_length": 588.0, "completions/mean_length": 136.98214721679688, "completions/mean_terminated_length": 136.98214721679688, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.36146339774131775, "epoch": 5.595890410958904, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.3446360007854343, "kl": 1.0992457866668701, "learning_rate": 3.6027397260273974e-07, "loss": -0.0211, "num_tokens": 10208640.0, "reward": 0.6035236716270447, "reward_std": 0.12403013557195663, "rewards/check_gptzero_func/mean": 0.6035236120223999, "rewards/check_gptzero_func/std": 0.36024364829063416, "sampling/importance_sampling_ratio/max": 1.4864006042480469, "sampling/importance_sampling_ratio/mean": 1.0006062984466553, "sampling/importance_sampling_ratio/min": 0.652746319770813, "sampling/sampling_logp_difference/max": 0.4265667200088501, "sampling/sampling_logp_difference/mean": 0.016928862780332565, "step": 817 }, { "clip_ratio/high_max": 0.03658536449074745, "clip_ratio/high_mean": 0.02035636082291603, "clip_ratio/low_mean": 0.012066175229847431, "clip_ratio/low_min": 0.003846153849735856, "clip_ratio/region_mean": 0.03242253512144089, "entropy": 0.36452335119247437, "epoch": 5.602739726027397, "grad_norm": 1.8087408260326288, "kl": 1.061833143234253, "learning_rate": 3.601027397260274e-07, "loss": -0.0316, "step": 818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1097.0, "completions/mean_length": 157.32144165039062, "completions/mean_terminated_length": 105.63636016845703, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.3030731678009033, "epoch": 5.609589041095891, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.8750252658635984, "kl": 0.900043785572052, "learning_rate": 3.5993150684931504e-07, "loss": -0.1461, "num_tokens": 10222562.0, "reward": 0.7557867765426636, "reward_std": 0.06782937049865723, "rewards/check_gptzero_func/mean": 0.7557867765426636, "rewards/check_gptzero_func/std": 0.3066542446613312, "sampling/importance_sampling_ratio/max": 1.7866661548614502, "sampling/importance_sampling_ratio/mean": 1.000247836112976, "sampling/importance_sampling_ratio/min": 0.6160274744033813, "sampling/sampling_logp_difference/max": 0.5803513526916504, "sampling/sampling_logp_difference/mean": 0.012663963250815868, "step": 819 }, { "clip_ratio/high_max": 0.03160083293914795, "clip_ratio/high_mean": 0.013192717917263508, "clip_ratio/low_mean": 0.013390375301241875, "clip_ratio/low_min": 0.0034602077212184668, "clip_ratio/region_mean": 0.026583092287182808, "entropy": 0.3027532398700714, "epoch": 5.616438356164384, "grad_norm": 2.1175935913799533, "kl": 0.91205894947052, "learning_rate": 3.5976027397260275e-07, "loss": -0.1548, "step": 820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1296.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 209.4107208251953, "completions/mean_terminated_length": 209.4107208251953, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.48908090591430664, "epoch": 5.623287671232877, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.8353627067063434, "kl": 0.7968677878379822, "learning_rate": 3.595890410958904e-07, "loss": 0.0132, "num_tokens": 10238579.0, "reward": 0.5610113739967346, "reward_std": 0.09017891436815262, "rewards/check_gptzero_func/mean": 0.5610113739967346, "rewards/check_gptzero_func/std": 0.34113484621047974, "sampling/importance_sampling_ratio/max": 1.6806414127349854, "sampling/importance_sampling_ratio/mean": 0.9992262721061707, "sampling/importance_sampling_ratio/min": 0.643915057182312, "sampling/sampling_logp_difference/max": 0.5191755294799805, "sampling/sampling_logp_difference/mean": 0.01907963491976261, "step": 821 }, { "clip_ratio/high_max": 0.01994796097278595, "clip_ratio/high_mean": 0.013438515365123749, "clip_ratio/low_mean": 0.011157048866152763, "clip_ratio/low_min": 0.0067447638139128685, "clip_ratio/region_mean": 0.024595562368631363, "entropy": 0.4901421368122101, "epoch": 5.63013698630137, "grad_norm": 1.5586964094595086, "kl": 0.7899470329284668, "learning_rate": 3.5941780821917805e-07, "loss": 0.0026, "step": 822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/max_terminated_length": 489.0, "completions/mean_length": 105.14286041259766, "completions/mean_terminated_length": 105.14286041259766, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.246307373046875, "epoch": 5.636986301369863, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 2.0946094125898895, "kl": 0.8544702529907227, "learning_rate": 3.592465753424657e-07, "loss": 0.0409, "num_tokens": 10249879.0, "reward": 0.6921407580375671, "reward_std": 0.11101138591766357, "rewards/check_gptzero_func/mean": 0.6921406984329224, "rewards/check_gptzero_func/std": 0.3456612825393677, "sampling/importance_sampling_ratio/max": 1.5071558952331543, "sampling/importance_sampling_ratio/mean": 0.9989995360374451, "sampling/importance_sampling_ratio/min": 0.6716532111167908, "sampling/sampling_logp_difference/max": 0.4102243185043335, "sampling/sampling_logp_difference/mean": 0.011886811815202236, "step": 823 }, { "clip_ratio/high_max": 0.023286759853363037, "clip_ratio/high_mean": 0.015674244612455368, "clip_ratio/low_mean": 0.01753646321594715, "clip_ratio/low_min": 0.010463378392159939, "clip_ratio/region_mean": 0.03321070596575737, "entropy": 0.24632693827152252, "epoch": 5.6438356164383565, "grad_norm": 1.4626457227242327, "kl": 0.8360530138015747, "learning_rate": 3.590753424657534e-07, "loss": 0.0313, "step": 824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1806.0, "completions/max_terminated_length": 1806.0, "completions/mean_length": 209.3928680419922, "completions/mean_terminated_length": 209.3928680419922, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.5124940276145935, "epoch": 5.6506849315068495, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.053452515312975, "kl": 0.8407009840011597, "learning_rate": 3.589041095890411e-07, "loss": -0.0179, "num_tokens": 10266535.0, "reward": 0.6384110450744629, "reward_std": 0.09874259680509567, "rewards/check_gptzero_func/mean": 0.6384110450744629, "rewards/check_gptzero_func/std": 0.3408324420452118, "sampling/importance_sampling_ratio/max": 1.5278915166854858, "sampling/importance_sampling_ratio/mean": 0.9998582601547241, "sampling/importance_sampling_ratio/min": 0.41120457649230957, "sampling/sampling_logp_difference/max": 0.8886644840240479, "sampling/sampling_logp_difference/mean": 0.01929224468767643, "step": 825 }, { "clip_ratio/high_max": 0.019354838877916336, "clip_ratio/high_mean": 0.014473280869424343, "clip_ratio/low_mean": 0.009714224375784397, "clip_ratio/low_min": 0.006275645457208157, "clip_ratio/region_mean": 0.02418750524520874, "entropy": 0.5143934488296509, "epoch": 5.657534246575342, "grad_norm": 1.7446396743050527, "kl": 0.7799150347709656, "learning_rate": 3.5873287671232876e-07, "loss": -0.0275, "step": 826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 175.6428680419922, "completions/mean_terminated_length": 175.6428680419922, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.374610960483551, "epoch": 5.664383561643835, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.870880882491077, "kl": 0.865655243396759, "learning_rate": 3.5856164383561646e-07, "loss": -0.0066, "num_tokens": 10281269.0, "reward": 0.6965763568878174, "reward_std": 0.08054081350564957, "rewards/check_gptzero_func/mean": 0.6965762972831726, "rewards/check_gptzero_func/std": 0.29095059633255005, "sampling/importance_sampling_ratio/max": 1.6007356643676758, "sampling/importance_sampling_ratio/mean": 0.9998103380203247, "sampling/importance_sampling_ratio/min": 0.37776753306388855, "sampling/sampling_logp_difference/max": 0.9734762907028198, "sampling/sampling_logp_difference/mean": 0.014641237445175648, "step": 827 }, { "clip_ratio/high_max": 0.022047244012355804, "clip_ratio/high_mean": 0.013346712104976177, "clip_ratio/low_mean": 0.009682876989245415, "clip_ratio/low_min": 0.0035587188322097063, "clip_ratio/region_mean": 0.023029591888189316, "entropy": 0.37586331367492676, "epoch": 5.671232876712329, "grad_norm": 1.6028916935003001, "kl": 0.8866562247276306, "learning_rate": 3.583904109589041e-07, "loss": -0.017, "step": 828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1063.0, "completions/max_terminated_length": 1063.0, "completions/mean_length": 167.71429443359375, "completions/mean_terminated_length": 167.71429443359375, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.3639907240867615, "epoch": 5.678082191780822, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.3458929542979012, "kl": 0.8861311674118042, "learning_rate": 3.5821917808219176e-07, "loss": -0.0185, "num_tokens": 10295757.0, "reward": 0.7098857164382935, "reward_std": 0.05539762228727341, "rewards/check_gptzero_func/mean": 0.7098856568336487, "rewards/check_gptzero_func/std": 0.3079080581665039, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9999136924743652, "sampling/importance_sampling_ratio/min": 0.40567389130592346, "sampling/sampling_logp_difference/max": 0.9022057056427002, "sampling/sampling_logp_difference/mean": 0.014681312255561352, "step": 829 }, { "clip_ratio/high_max": 0.018722467124462128, "clip_ratio/high_mean": 0.011481146328151226, "clip_ratio/low_mean": 0.00844769086688757, "clip_ratio/low_min": 0.00136239780113101, "clip_ratio/region_mean": 0.019928839057683945, "entropy": 0.36535099148750305, "epoch": 5.684931506849315, "grad_norm": 10.127707312080313, "kl": 0.8348504900932312, "learning_rate": 3.580479452054794e-07, "loss": -0.0203, "step": 830 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1361.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 152.92857360839844, "completions/mean_terminated_length": 152.92857360839844, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.3895290791988373, "epoch": 5.691780821917808, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.151620479529184, "kl": 0.8695670366287231, "learning_rate": 3.578767123287671e-07, "loss": 0.0211, "num_tokens": 10309453.0, "reward": 0.707614004611969, "reward_std": 0.06243716925382614, "rewards/check_gptzero_func/mean": 0.7076139450073242, "rewards/check_gptzero_func/std": 0.2948280870914459, "sampling/importance_sampling_ratio/max": 1.5277611017227173, "sampling/importance_sampling_ratio/mean": 1.0004962682724, "sampling/importance_sampling_ratio/min": 0.6419297456741333, "sampling/sampling_logp_difference/max": 0.44327640533447266, "sampling/sampling_logp_difference/mean": 0.015674054622650146, "step": 831 }, { "clip_ratio/high_max": 0.032622333616018295, "clip_ratio/high_mean": 0.01782897301018238, "clip_ratio/low_mean": 0.014969296753406525, "clip_ratio/low_min": 0.005868544802069664, "clip_ratio/region_mean": 0.032798267900943756, "entropy": 0.39272159337997437, "epoch": 5.698630136986301, "grad_norm": 1.684474204800372, "kl": 0.854531466960907, "learning_rate": 3.5770547945205476e-07, "loss": 0.0096, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1281.0, "completions/max_terminated_length": 1281.0, "completions/mean_length": 236.2678680419922, "completions/mean_terminated_length": 236.2678680419922, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.4589460790157318, "epoch": 5.705479452054795, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.673378155783559, "kl": 0.758402943611145, "learning_rate": 3.575342465753424e-07, "loss": 0.0044, "num_tokens": 10327600.0, "reward": 0.6396105885505676, "reward_std": 0.09050043672323227, "rewards/check_gptzero_func/mean": 0.6396105885505676, "rewards/check_gptzero_func/std": 0.35258328914642334, "sampling/importance_sampling_ratio/max": 1.501895546913147, "sampling/importance_sampling_ratio/mean": 1.0001732110977173, "sampling/importance_sampling_ratio/min": 0.4870974123477936, "sampling/sampling_logp_difference/max": 0.7192912101745605, "sampling/sampling_logp_difference/mean": 0.01714707538485527, "step": 833 }, { "clip_ratio/high_max": 0.011900595389306545, "clip_ratio/high_mean": 0.009269162081182003, "clip_ratio/low_mean": 0.007764201145619154, "clip_ratio/low_min": 0.0033057851251214743, "clip_ratio/region_mean": 0.01703336276113987, "entropy": 0.46112170815467834, "epoch": 5.712328767123288, "grad_norm": 1.4670098289508227, "kl": 0.7413718104362488, "learning_rate": 3.5736301369863017e-07, "loss": -0.0054, "step": 834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1368.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 195.94644165039062, "completions/mean_terminated_length": 195.94644165039062, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.5044463872909546, "epoch": 5.719178082191781, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.8136173576795456, "kl": 0.862808883190155, "learning_rate": 3.571917808219178e-07, "loss": -0.0024, "num_tokens": 10343633.0, "reward": 0.785113513469696, "reward_std": 0.041651807725429535, "rewards/check_gptzero_func/mean": 0.7851134538650513, "rewards/check_gptzero_func/std": 0.21589455008506775, "sampling/importance_sampling_ratio/max": 1.4193962812423706, "sampling/importance_sampling_ratio/mean": 0.9992426633834839, "sampling/importance_sampling_ratio/min": 0.130820631980896, "sampling/sampling_logp_difference/max": 2.033928155899048, "sampling/sampling_logp_difference/mean": 0.017176568508148193, "step": 835 }, { "clip_ratio/high_max": 0.01569186896085739, "clip_ratio/high_mean": 0.011690001003444195, "clip_ratio/low_mean": 0.00905235018581152, "clip_ratio/low_min": 0.006586594507098198, "clip_ratio/region_mean": 0.020742351189255714, "entropy": 0.5039147138595581, "epoch": 5.726027397260274, "grad_norm": 1.6797301879691449, "kl": 0.8626928329467773, "learning_rate": 3.5702054794520547e-07, "loss": -0.0131, "step": 836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1113.0, "completions/max_terminated_length": 1113.0, "completions/mean_length": 219.98214721679688, "completions/mean_terminated_length": 219.98214721679688, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.517282247543335, "epoch": 5.732876712328768, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.7327054127635209, "kl": 0.833089292049408, "learning_rate": 3.568493150684931e-07, "loss": 0.0173, "num_tokens": 10360590.0, "reward": 0.5749141573905945, "reward_std": 0.13764441013336182, "rewards/check_gptzero_func/mean": 0.5749140977859497, "rewards/check_gptzero_func/std": 0.3754396140575409, "sampling/importance_sampling_ratio/max": 1.4353302717208862, "sampling/importance_sampling_ratio/mean": 0.999679684638977, "sampling/importance_sampling_ratio/min": 0.6527806520462036, "sampling/sampling_logp_difference/max": 0.4265141487121582, "sampling/sampling_logp_difference/mean": 0.019926033914089203, "step": 837 }, { "clip_ratio/high_max": 0.015375517308712006, "clip_ratio/high_mean": 0.010484603233635426, "clip_ratio/low_mean": 0.00980137288570404, "clip_ratio/low_min": 0.007952285930514336, "clip_ratio/region_mean": 0.02028597705066204, "entropy": 0.5169443488121033, "epoch": 5.739726027397261, "grad_norm": 1.5152853842635332, "kl": 0.8339988589286804, "learning_rate": 3.566780821917808e-07, "loss": 0.0067, "step": 838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1071.0, "completions/max_terminated_length": 1071.0, "completions/mean_length": 158.05357360839844, "completions/mean_terminated_length": 158.05357360839844, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.2860850691795349, "epoch": 5.7465753424657535, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.8286094202055108, "kl": 0.8945840001106262, "learning_rate": 3.565068493150685e-07, "loss": 0.0196, "num_tokens": 10374539.0, "reward": 0.5858033895492554, "reward_std": 0.08091267943382263, "rewards/check_gptzero_func/mean": 0.5858033895492554, "rewards/check_gptzero_func/std": 0.41138455271720886, "sampling/importance_sampling_ratio/max": 1.3940688371658325, "sampling/importance_sampling_ratio/mean": 1.0000816583633423, "sampling/importance_sampling_ratio/min": 0.5676589012145996, "sampling/sampling_logp_difference/max": 0.5662345886230469, "sampling/sampling_logp_difference/mean": 0.01264350488781929, "step": 839 }, { "clip_ratio/high_max": 0.022614028304815292, "clip_ratio/high_mean": 0.01294757891446352, "clip_ratio/low_mean": 0.009184266440570354, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.022131845355033875, "entropy": 0.2874505817890167, "epoch": 5.7534246575342465, "grad_norm": 1.4876677520520178, "kl": 0.8858436346054077, "learning_rate": 3.563356164383561e-07, "loss": 0.0099, "step": 840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 961.0, "completions/max_terminated_length": 961.0, "completions/mean_length": 133.30357360839844, "completions/mean_terminated_length": 133.30357360839844, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 0.3437385857105255, "epoch": 5.760273972602739, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.2815145891785926, "kl": 1.070412278175354, "learning_rate": 3.561643835616438e-07, "loss": 0.0132, "num_tokens": 10386960.0, "reward": 0.7093197107315063, "reward_std": 0.07296457886695862, "rewards/check_gptzero_func/mean": 0.7093196511268616, "rewards/check_gptzero_func/std": 0.3369818925857544, "sampling/importance_sampling_ratio/max": 1.375773310661316, "sampling/importance_sampling_ratio/mean": 0.9997325539588928, "sampling/importance_sampling_ratio/min": 0.6440456509590149, "sampling/sampling_logp_difference/max": 0.4399857521057129, "sampling/sampling_logp_difference/mean": 0.014561336487531662, "step": 841 }, { "clip_ratio/high_max": 0.03312883526086807, "clip_ratio/high_mean": 0.01763216219842434, "clip_ratio/low_mean": 0.014004549942910671, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.031636711210012436, "entropy": 0.34495434165000916, "epoch": 5.767123287671232, "grad_norm": 1.684009287286155, "kl": 1.027061104774475, "learning_rate": 3.559931506849315e-07, "loss": 0.0021, "step": 842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1113.0, "completions/max_terminated_length": 1113.0, "completions/mean_length": 179.23214721679688, "completions/mean_terminated_length": 179.23214721679688, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.41457313299179077, "epoch": 5.773972602739726, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.2292098785141916, "kl": 0.9591549634933472, "learning_rate": 3.558219178082192e-07, "loss": 0.0156, "num_tokens": 10401993.0, "reward": 0.6365022659301758, "reward_std": 0.11003652960062027, "rewards/check_gptzero_func/mean": 0.6365022659301758, "rewards/check_gptzero_func/std": 0.36520469188690186, "sampling/importance_sampling_ratio/max": 1.4632703065872192, "sampling/importance_sampling_ratio/mean": 1.0002230405807495, "sampling/importance_sampling_ratio/min": 0.6395543217658997, "sampling/sampling_logp_difference/max": 0.4469836950302124, "sampling/sampling_logp_difference/mean": 0.016770843416452408, "step": 843 }, { "clip_ratio/high_max": 0.016423357650637627, "clip_ratio/high_mean": 0.011509600095450878, "clip_ratio/low_mean": 0.010709648951888084, "clip_ratio/low_min": 0.007459756452590227, "clip_ratio/region_mean": 0.022219249978661537, "entropy": 0.4144323170185089, "epoch": 5.780821917808219, "grad_norm": 1.8468560162901322, "kl": 0.8013982772827148, "learning_rate": 3.5565068493150683e-07, "loss": 0.0057, "step": 844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1231.0, "completions/max_terminated_length": 1231.0, "completions/mean_length": 149.71429443359375, "completions/mean_terminated_length": 149.71429443359375, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.27025488018989563, "epoch": 5.787671232876712, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.9421023122361, "kl": 0.908437192440033, "learning_rate": 3.5547945205479454e-07, "loss": 0.0158, "num_tokens": 10415625.0, "reward": 0.7929981350898743, "reward_std": 0.039683565497398376, "rewards/check_gptzero_func/mean": 0.7929981350898743, "rewards/check_gptzero_func/std": 0.2898584008216858, "sampling/importance_sampling_ratio/max": 1.7164195775985718, "sampling/importance_sampling_ratio/mean": 1.0005683898925781, "sampling/importance_sampling_ratio/min": 0.675563395023346, "sampling/sampling_logp_difference/max": 0.5402405261993408, "sampling/sampling_logp_difference/mean": 0.010747120715677738, "step": 845 }, { "clip_ratio/high_max": 0.022375214844942093, "clip_ratio/high_mean": 0.011614641174674034, "clip_ratio/low_mean": 0.010042442008852959, "clip_ratio/low_min": 0.004032257944345474, "clip_ratio/region_mean": 0.021657083183526993, "entropy": 0.26911085844039917, "epoch": 5.794520547945205, "grad_norm": 1.5690764167279032, "kl": 0.913528561592102, "learning_rate": 3.553082191780822e-07, "loss": 0.0061, "step": 846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1570.0, "completions/max_terminated_length": 1570.0, "completions/mean_length": 139.0178680419922, "completions/mean_terminated_length": 139.0178680419922, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.30213648080825806, "epoch": 5.801369863013699, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.164099917448123, "kl": 1.0319174528121948, "learning_rate": 3.5513698630136984e-07, "loss": 0.0077, "num_tokens": 10428338.0, "reward": 0.7581003904342651, "reward_std": 0.061569493263959885, "rewards/check_gptzero_func/mean": 0.7581003308296204, "rewards/check_gptzero_func/std": 0.309759259223938, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0002268552780151, "sampling/importance_sampling_ratio/min": 0.6389029026031494, "sampling/sampling_logp_difference/max": 0.7341885566711426, "sampling/sampling_logp_difference/mean": 0.01374569907784462, "step": 847 }, { "clip_ratio/high_max": 0.03231017664074898, "clip_ratio/high_mean": 0.016171442344784737, "clip_ratio/low_mean": 0.011367865838110447, "clip_ratio/low_min": 0.0075329565443098545, "clip_ratio/region_mean": 0.02753930725157261, "entropy": 0.3023890554904938, "epoch": 5.808219178082192, "grad_norm": 1.8421803680496343, "kl": 1.0350552797317505, "learning_rate": 3.549657534246575e-07, "loss": -0.0014, "step": 848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 423.0, "completions/max_terminated_length": 423.0, "completions/mean_length": 80.28572082519531, "completions/mean_terminated_length": 80.28572082519531, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.16600298881530762, "epoch": 5.815068493150685, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.1265954531751405, "kl": 1.144063949584961, "learning_rate": 3.547945205479452e-07, "loss": 0.0433, "num_tokens": 10437620.0, "reward": 0.8061015009880066, "reward_std": 0.08108261972665787, "rewards/check_gptzero_func/mean": 0.8061014413833618, "rewards/check_gptzero_func/std": 0.3029060363769531, "sampling/importance_sampling_ratio/max": 1.4788522720336914, "sampling/importance_sampling_ratio/mean": 1.0000079870224, "sampling/importance_sampling_ratio/min": 0.6475181579589844, "sampling/sampling_logp_difference/max": 0.43460845947265625, "sampling/sampling_logp_difference/mean": 0.00877135805785656, "step": 849 }, { "clip_ratio/high_max": 0.0219123512506485, "clip_ratio/high_mean": 0.015766579657793045, "clip_ratio/low_mean": 0.022354856133461, "clip_ratio/low_min": 0.009107467718422413, "clip_ratio/region_mean": 0.038121435791254044, "entropy": 0.16198398172855377, "epoch": 5.821917808219178, "grad_norm": 1.9103015540896473, "kl": 1.1838372945785522, "learning_rate": 3.5462328767123284e-07, "loss": 0.0383, "step": 850 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1207.0, "completions/max_terminated_length": 1207.0, "completions/mean_length": 136.25, "completions/mean_terminated_length": 136.25, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.25533705949783325, "epoch": 5.828767123287671, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.004933436619983, "kl": 0.9277874827384949, "learning_rate": 3.544520547945205e-07, "loss": -0.054, "num_tokens": 10449844.0, "reward": 0.7765617966651917, "reward_std": 0.07194127142429352, "rewards/check_gptzero_func/mean": 0.7765617370605469, "rewards/check_gptzero_func/std": 0.3324601948261261, "sampling/importance_sampling_ratio/max": 1.5278397798538208, "sampling/importance_sampling_ratio/mean": 1.0001311302185059, "sampling/importance_sampling_ratio/min": 0.6782430410385132, "sampling/sampling_logp_difference/max": 0.4238548278808594, "sampling/sampling_logp_difference/mean": 0.01082540862262249, "step": 851 }, { "clip_ratio/high_max": 0.023824067786335945, "clip_ratio/high_mean": 0.013517044484615326, "clip_ratio/low_mean": 0.011725598946213722, "clip_ratio/low_min": 0.004886988550424576, "clip_ratio/region_mean": 0.025242643430829048, "entropy": 0.25404927134513855, "epoch": 5.835616438356165, "grad_norm": 1.6575164098628936, "kl": 0.8803423047065735, "learning_rate": 3.5428082191780825e-07, "loss": -0.0634, "step": 852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 133.9107208251953, "completions/mean_terminated_length": 133.9107208251953, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.3006531298160553, "epoch": 5.842465753424658, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.2539682319210876, "kl": 1.2006803750991821, "learning_rate": 3.541095890410959e-07, "loss": 0.0682, "num_tokens": 10461799.0, "reward": 0.7021389603614807, "reward_std": 0.09069833159446716, "rewards/check_gptzero_func/mean": 0.7021389007568359, "rewards/check_gptzero_func/std": 0.33632150292396545, "sampling/importance_sampling_ratio/max": 1.6195759773254395, "sampling/importance_sampling_ratio/mean": 1.000308871269226, "sampling/importance_sampling_ratio/min": 0.4159544110298157, "sampling/sampling_logp_difference/max": 0.8771796226501465, "sampling/sampling_logp_difference/mean": 0.01336077693849802, "step": 853 }, { "clip_ratio/high_max": 0.020167427137494087, "clip_ratio/high_mean": 0.010953987948596478, "clip_ratio/low_mean": 0.013349688611924648, "clip_ratio/low_min": 0.0026385225355625153, "clip_ratio/region_mean": 0.024303674697875977, "entropy": 0.2998075485229492, "epoch": 5.8493150684931505, "grad_norm": 1.839003808748946, "kl": 1.205292820930481, "learning_rate": 3.5393835616438355e-07, "loss": 0.0564, "step": 854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 213.50001525878906, "completions/mean_terminated_length": 213.50001525878906, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.4102082848548889, "epoch": 5.8561643835616435, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.727501327813627, "kl": 0.648472011089325, "learning_rate": 3.537671232876712e-07, "loss": -0.0038, "num_tokens": 10479067.0, "reward": 0.6879853010177612, "reward_std": 0.04393972083926201, "rewards/check_gptzero_func/mean": 0.6879852414131165, "rewards/check_gptzero_func/std": 0.3229162096977234, "sampling/importance_sampling_ratio/max": 1.5613645315170288, "sampling/importance_sampling_ratio/mean": 1.00020432472229, "sampling/importance_sampling_ratio/min": 0.6657049059867859, "sampling/sampling_logp_difference/max": 0.445560097694397, "sampling/sampling_logp_difference/mean": 0.014335096813738346, "step": 855 }, { "clip_ratio/high_max": 0.015905383974313736, "clip_ratio/high_mean": 0.008739511482417583, "clip_ratio/low_mean": 0.007652223110198975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.016391733661293983, "entropy": 0.4107559323310852, "epoch": 5.863013698630137, "grad_norm": 1.4478128650551165, "kl": 0.6479817628860474, "learning_rate": 3.535958904109589e-07, "loss": -0.0131, "step": 856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1132.0, "completions/max_terminated_length": 1132.0, "completions/mean_length": 173.62501525878906, "completions/mean_terminated_length": 173.62501525878906, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.32056593894958496, "epoch": 5.86986301369863, "frac_reward_zero_std": 0.2857142984867096, "grad_norm": 1.8585525820073256, "kl": 0.8083272576332092, "learning_rate": 3.5342465753424655e-07, "loss": 0.0096, "num_tokens": 10493710.0, "reward": 0.8255680203437805, "reward_std": 0.06959423422813416, "rewards/check_gptzero_func/mean": 0.8255680203437805, "rewards/check_gptzero_func/std": 0.26582980155944824, "sampling/importance_sampling_ratio/max": 1.629921793937683, "sampling/importance_sampling_ratio/mean": 1.000320553779602, "sampling/importance_sampling_ratio/min": 0.6122426390647888, "sampling/sampling_logp_difference/max": 0.49062657356262207, "sampling/sampling_logp_difference/mean": 0.01173738855868578, "step": 857 }, { "clip_ratio/high_max": 0.0190513227134943, "clip_ratio/high_mean": 0.010624485090374947, "clip_ratio/low_mean": 0.00848153792321682, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.019106024876236916, "entropy": 0.32109981775283813, "epoch": 5.876712328767123, "grad_norm": 1.5895184903444988, "kl": 0.7955619692802429, "learning_rate": 3.532534246575342e-07, "loss": -0.0007, "step": 858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 989.0, "completions/max_terminated_length": 989.0, "completions/mean_length": 193.98214721679688, "completions/mean_terminated_length": 193.98214721679688, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.5225113034248352, "epoch": 5.883561643835616, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.8478245980070518, "kl": 0.8523861169815063, "learning_rate": 3.530821917808219e-07, "loss": -0.0083, "num_tokens": 10509853.0, "reward": 0.7306429743766785, "reward_std": 0.07992508262395859, "rewards/check_gptzero_func/mean": 0.7306429147720337, "rewards/check_gptzero_func/std": 0.33036983013153076, "sampling/importance_sampling_ratio/max": 1.6160929203033447, "sampling/importance_sampling_ratio/mean": 1.0005815029144287, "sampling/importance_sampling_ratio/min": 0.5704805254936218, "sampling/sampling_logp_difference/max": 0.5612763166427612, "sampling/sampling_logp_difference/mean": 0.017309030517935753, "step": 859 }, { "clip_ratio/high_max": 0.02654867246747017, "clip_ratio/high_mean": 0.01258982252329588, "clip_ratio/low_mean": 0.009362878277897835, "clip_ratio/low_min": 0.002936857519671321, "clip_ratio/region_mean": 0.02195270173251629, "entropy": 0.5221709609031677, "epoch": 5.890410958904109, "grad_norm": 1.5974731151539656, "kl": 0.8547690510749817, "learning_rate": 3.5291095890410956e-07, "loss": -0.0184, "step": 860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2307.0, "completions/max_terminated_length": 2307.0, "completions/mean_length": 215.5357208251953, "completions/mean_terminated_length": 215.5357208251953, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.3558237850666046, "epoch": 5.897260273972603, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.6748758422006964, "kl": 0.6928812861442566, "learning_rate": 3.5273972602739726e-07, "loss": -0.0424, "num_tokens": 10526891.0, "reward": 0.7817510962486267, "reward_std": 0.09615912288427353, "rewards/check_gptzero_func/mean": 0.7817510962486267, "rewards/check_gptzero_func/std": 0.3248593807220459, "sampling/importance_sampling_ratio/max": 1.497294545173645, "sampling/importance_sampling_ratio/mean": 1.0001251697540283, "sampling/importance_sampling_ratio/min": 0.405740886926651, "sampling/sampling_logp_difference/max": 0.9020405411720276, "sampling/sampling_logp_difference/mean": 0.013395369052886963, "step": 861 }, { "clip_ratio/high_max": 0.01440249104052782, "clip_ratio/high_mean": 0.009282814338803291, "clip_ratio/low_mean": 0.00796764437109232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.017250459641218185, "entropy": 0.3561600148677826, "epoch": 5.904109589041096, "grad_norm": 1.3715261134490897, "kl": 0.6923283338546753, "learning_rate": 3.525684931506849e-07, "loss": -0.0519, "step": 862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.0, "completions/max_terminated_length": 311.0, "completions/mean_length": 87.03572082519531, "completions/mean_terminated_length": 87.03572082519531, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.1915399581193924, "epoch": 5.910958904109589, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.186183638287615, "kl": 0.9457807540893555, "learning_rate": 3.523972602739726e-07, "loss": -0.0036, "num_tokens": 10537115.0, "reward": 0.7491239309310913, "reward_std": 0.08918711543083191, "rewards/check_gptzero_func/mean": 0.7491239309310913, "rewards/check_gptzero_func/std": 0.3426686227321625, "sampling/importance_sampling_ratio/max": 1.481220006942749, "sampling/importance_sampling_ratio/mean": 1.0003522634506226, "sampling/importance_sampling_ratio/min": 0.667268693447113, "sampling/sampling_logp_difference/max": 0.40456247329711914, "sampling/sampling_logp_difference/mean": 0.010177713818848133, "step": 863 }, { "clip_ratio/high_max": 0.03692614659667015, "clip_ratio/high_mean": 0.01582692749798298, "clip_ratio/low_mean": 0.01502193696796894, "clip_ratio/low_min": 0.008207933977246284, "clip_ratio/region_mean": 0.03084886446595192, "entropy": 0.1907256543636322, "epoch": 5.917808219178082, "grad_norm": 1.5870301411040924, "kl": 0.9565491080284119, "learning_rate": 3.5222602739726026e-07, "loss": -0.0114, "step": 864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 815.0, "completions/max_terminated_length": 815.0, "completions/mean_length": 104.85714721679688, "completions/mean_terminated_length": 104.85714721679688, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.33093956112861633, "epoch": 5.924657534246576, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.3230588417387827, "kl": 0.9859779477119446, "learning_rate": 3.520547945205479e-07, "loss": 0.0689, "num_tokens": 10547361.0, "reward": 0.6790100932121277, "reward_std": 0.06751012802124023, "rewards/check_gptzero_func/mean": 0.6790100336074829, "rewards/check_gptzero_func/std": 0.3296887278556824, "sampling/importance_sampling_ratio/max": 1.4706451892852783, "sampling/importance_sampling_ratio/mean": 1.000712275505066, "sampling/importance_sampling_ratio/min": 0.6862483024597168, "sampling/sampling_logp_difference/max": 0.38570117950439453, "sampling/sampling_logp_difference/mean": 0.0143132284283638, "step": 865 }, { "clip_ratio/high_max": 0.02261306531727314, "clip_ratio/high_mean": 0.015393815003335476, "clip_ratio/low_mean": 0.01450303103774786, "clip_ratio/low_min": 0.00699300691485405, "clip_ratio/region_mean": 0.029896846041083336, "entropy": 0.3308982253074646, "epoch": 5.931506849315069, "grad_norm": 1.8366964533839263, "kl": 0.9868079423904419, "learning_rate": 3.5188356164383557e-07, "loss": 0.0572, "step": 866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1080.0, "completions/max_terminated_length": 1080.0, "completions/mean_length": 165.6428680419922, "completions/mean_terminated_length": 165.6428680419922, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.3719506859779358, "epoch": 5.938356164383562, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 17.925689385172802, "kl": 1.6592843532562256, "learning_rate": 3.5171232876712327e-07, "loss": 0.0139, "num_tokens": 10561323.0, "reward": 0.7147631049156189, "reward_std": 0.07679323107004166, "rewards/check_gptzero_func/mean": 0.7147631049156189, "rewards/check_gptzero_func/std": 0.2826632857322693, "sampling/importance_sampling_ratio/max": 1.6214739084243774, "sampling/importance_sampling_ratio/mean": 1.000375747680664, "sampling/importance_sampling_ratio/min": 0.6535419225692749, "sampling/sampling_logp_difference/max": 0.4833354949951172, "sampling/sampling_logp_difference/mean": 0.014212061651051044, "step": 867 }, { "clip_ratio/high_max": 0.009554140269756317, "clip_ratio/high_mean": 0.004498255904763937, "clip_ratio/low_mean": 0.0024028941988945007, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006901150103658438, "entropy": 0.37479543685913086, "epoch": 5.945205479452055, "grad_norm": 1.9806491744715828, "kl": 0.9271469712257385, "learning_rate": 3.515410958904109e-07, "loss": -0.0024, "step": 868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 904.0, "completions/max_terminated_length": 904.0, "completions/mean_length": 156.17857360839844, "completions/mean_terminated_length": 156.17857360839844, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.30639442801475525, "epoch": 5.9520547945205475, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.9682398370174818, "kl": 0.7821012735366821, "learning_rate": 3.513698630136986e-07, "loss": 0.0324, "num_tokens": 10575405.0, "reward": 0.7135013937950134, "reward_std": 0.10201713442802429, "rewards/check_gptzero_func/mean": 0.7135013937950134, "rewards/check_gptzero_func/std": 0.31337666511535645, "sampling/importance_sampling_ratio/max": 1.4461240768432617, "sampling/importance_sampling_ratio/mean": 0.9996841549873352, "sampling/importance_sampling_ratio/min": 0.5069074630737305, "sampling/sampling_logp_difference/max": 0.6794267892837524, "sampling/sampling_logp_difference/mean": 0.01257795188575983, "step": 869 }, { "clip_ratio/high_max": 0.020605280995368958, "clip_ratio/high_mean": 0.010008039884269238, "clip_ratio/low_mean": 0.009676394984126091, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.019684433937072754, "entropy": 0.30599117279052734, "epoch": 5.958904109589041, "grad_norm": 1.5606939977771508, "kl": 0.7885946035385132, "learning_rate": 3.511986301369863e-07, "loss": 0.0227, "step": 870 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 998.0, "completions/max_terminated_length": 998.0, "completions/mean_length": 121.26786041259766, "completions/mean_terminated_length": 121.26786041259766, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.36187729239463806, "epoch": 5.965753424657534, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.20963018915585, "kl": 1.058180570602417, "learning_rate": 3.51027397260274e-07, "loss": 0.0033, "num_tokens": 10587226.0, "reward": 0.6983538269996643, "reward_std": 0.09074874967336655, "rewards/check_gptzero_func/mean": 0.6983537673950195, "rewards/check_gptzero_func/std": 0.3094756305217743, "sampling/importance_sampling_ratio/max": 1.439820408821106, "sampling/importance_sampling_ratio/mean": 1.0005675554275513, "sampling/importance_sampling_ratio/min": 0.6577995419502258, "sampling/sampling_logp_difference/max": 0.41885507106781006, "sampling/sampling_logp_difference/mean": 0.014917174354195595, "step": 871 }, { "clip_ratio/high_max": 0.022857142612338066, "clip_ratio/high_mean": 0.01340711209923029, "clip_ratio/low_mean": 0.01400061883032322, "clip_ratio/low_min": 0.0031201248057186604, "clip_ratio/region_mean": 0.027407731860876083, "entropy": 0.36329492926597595, "epoch": 5.972602739726027, "grad_norm": 1.7468144536584012, "kl": 1.0594764947891235, "learning_rate": 3.508561643835616e-07, "loss": -0.0078, "step": 872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 176.87501525878906, "completions/mean_terminated_length": 176.87501525878906, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.36949020624160767, "epoch": 5.97945205479452, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.8462149500072058, "kl": 0.8730757832527161, "learning_rate": 3.506849315068493e-07, "loss": 0.007, "num_tokens": 10601987.0, "reward": 0.6955273747444153, "reward_std": 0.07306691259145737, "rewards/check_gptzero_func/mean": 0.6955273747444153, "rewards/check_gptzero_func/std": 0.3529856204986572, "sampling/importance_sampling_ratio/max": 1.5315396785736084, "sampling/importance_sampling_ratio/mean": 0.9999426603317261, "sampling/importance_sampling_ratio/min": 0.688004732131958, "sampling/sampling_logp_difference/max": 0.4262735843658447, "sampling/sampling_logp_difference/mean": 0.014622326008975506, "step": 873 }, { "clip_ratio/high_max": 0.017639512196183205, "clip_ratio/high_mean": 0.01040048897266388, "clip_ratio/low_mean": 0.00975830014795065, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.020158788189291954, "entropy": 0.36890894174575806, "epoch": 5.986301369863014, "grad_norm": 1.5701151350896525, "kl": 0.8757193684577942, "learning_rate": 3.50513698630137e-07, "loss": -0.0029, "step": 874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1128.0, "completions/max_terminated_length": 1128.0, "completions/mean_length": 191.57144165039062, "completions/mean_terminated_length": 191.57144165039062, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.443583220243454, "epoch": 5.993150684931507, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.8779554094230286, "kl": 0.7623099088668823, "learning_rate": 3.5034246575342463e-07, "loss": 0.0149, "num_tokens": 10618337.0, "reward": 0.6174001693725586, "reward_std": 0.11258626729249954, "rewards/check_gptzero_func/mean": 0.6174001693725586, "rewards/check_gptzero_func/std": 0.38138535618782043, "sampling/importance_sampling_ratio/max": 1.8623497486114502, "sampling/importance_sampling_ratio/mean": 0.999743640422821, "sampling/importance_sampling_ratio/min": 0.4056459069252014, "sampling/sampling_logp_difference/max": 0.9022747278213501, "sampling/sampling_logp_difference/mean": 0.016305187717080116, "step": 875 }, { "clip_ratio/high_max": 0.01788908801972866, "clip_ratio/high_mean": 0.011112443171441555, "clip_ratio/low_mean": 0.008262782357633114, "clip_ratio/low_min": 0.00272479560226202, "clip_ratio/region_mean": 0.01937522552907467, "entropy": 0.44552111625671387, "epoch": 6.0, "grad_norm": 1.5714692189876514, "kl": 0.7583974003791809, "learning_rate": 3.501712328767123e-07, "loss": 0.005, "step": 876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1061.0, "completions/max_terminated_length": 1061.0, "completions/mean_length": 120.37500762939453, "completions/mean_terminated_length": 120.37500762939453, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.33235734701156616, "epoch": 6.006849315068493, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.516342992449506, "kl": 1.0802390575408936, "learning_rate": 3.5e-07, "loss": 0.044, "num_tokens": 10630140.0, "reward": 0.7037427425384521, "reward_std": 0.055292412638664246, "rewards/check_gptzero_func/mean": 0.7037426829338074, "rewards/check_gptzero_func/std": 0.3333462178707123, "sampling/importance_sampling_ratio/max": 1.4360237121582031, "sampling/importance_sampling_ratio/mean": 0.9993061423301697, "sampling/importance_sampling_ratio/min": 0.6550171971321106, "sampling/sampling_logp_difference/max": 0.4230937957763672, "sampling/sampling_logp_difference/mean": 0.014649284072220325, "step": 877 }, { "clip_ratio/high_max": 0.02244389057159424, "clip_ratio/high_mean": 0.014379044994711876, "clip_ratio/low_mean": 0.00849713385105133, "clip_ratio/low_min": 0.0033333334140479565, "clip_ratio/region_mean": 0.022876176983118057, "entropy": 0.33584144711494446, "epoch": 6.013698630136986, "grad_norm": 2.146055098619657, "kl": 0.9945135116577148, "learning_rate": 3.498287671232877e-07, "loss": 0.0359, "step": 878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.0, "completions/max_terminated_length": 334.0, "completions/mean_length": 78.73214721679688, "completions/mean_terminated_length": 78.73214721679688, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 0.1676057130098343, "epoch": 6.02054794520548, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.1950422066153767, "kl": 1.0408211946487427, "learning_rate": 3.4965753424657534e-07, "loss": 0.0339, "num_tokens": 10639045.0, "reward": 0.822107195854187, "reward_std": 0.04096372798085213, "rewards/check_gptzero_func/mean": 0.822107195854187, "rewards/check_gptzero_func/std": 0.30998075008392334, "sampling/importance_sampling_ratio/max": 1.3975985050201416, "sampling/importance_sampling_ratio/mean": 0.9997378587722778, "sampling/importance_sampling_ratio/min": 0.7393302917480469, "sampling/sampling_logp_difference/max": 0.33475542068481445, "sampling/sampling_logp_difference/mean": 0.00689689489081502, "step": 879 }, { "clip_ratio/high_max": 0.0359012708067894, "clip_ratio/high_mean": 0.010562744922935963, "clip_ratio/low_mean": 0.010337241925299168, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.02089998498558998, "entropy": 0.16749326884746552, "epoch": 6.027397260273973, "grad_norm": 1.5435656512600493, "kl": 1.047518014907837, "learning_rate": 3.49486301369863e-07, "loss": 0.0238, "step": 880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 194.71429443359375, "completions/mean_terminated_length": 194.71429443359375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.39441797137260437, "epoch": 6.034246575342466, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.7678797490561886, "kl": 0.7527085542678833, "learning_rate": 3.493150684931507e-07, "loss": -0.0012, "num_tokens": 10655515.0, "reward": 0.7808259129524231, "reward_std": 0.06482900679111481, "rewards/check_gptzero_func/mean": 0.7808259129524231, "rewards/check_gptzero_func/std": 0.31429770588874817, "sampling/importance_sampling_ratio/max": 1.475565791130066, "sampling/importance_sampling_ratio/mean": 1.0000050067901611, "sampling/importance_sampling_ratio/min": 0.6441452503204346, "sampling/sampling_logp_difference/max": 0.439831018447876, "sampling/sampling_logp_difference/mean": 0.013936237432062626, "step": 881 }, { "clip_ratio/high_max": 0.021105527877807617, "clip_ratio/high_mean": 0.010400703176856041, "clip_ratio/low_mean": 0.0065572308376431465, "clip_ratio/low_min": 0.0012180268531665206, "clip_ratio/region_mean": 0.016957934945821762, "entropy": 0.39452192187309265, "epoch": 6.041095890410959, "grad_norm": 1.5141928141115935, "kl": 0.7548874020576477, "learning_rate": 3.4914383561643834e-07, "loss": -0.0115, "step": 882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 223.96429443359375, "completions/mean_terminated_length": 223.96429443359375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.40373721718788147, "epoch": 6.0479452054794525, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.7154642582761783, "kl": 0.7956382632255554, "learning_rate": 3.48972602739726e-07, "loss": -0.0172, "num_tokens": 10673433.0, "reward": 0.8048259019851685, "reward_std": 0.06928881257772446, "rewards/check_gptzero_func/mean": 0.8048258423805237, "rewards/check_gptzero_func/std": 0.28177982568740845, "sampling/importance_sampling_ratio/max": 1.4865520000457764, "sampling/importance_sampling_ratio/mean": 0.9998778104782104, "sampling/importance_sampling_ratio/min": 0.6967664361000061, "sampling/sampling_logp_difference/max": 0.39645934104919434, "sampling/sampling_logp_difference/mean": 0.01376378070563078, "step": 883 }, { "clip_ratio/high_max": 0.013721185736358166, "clip_ratio/high_mean": 0.008026039227843285, "clip_ratio/low_mean": 0.005404997151345015, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.013431036844849586, "entropy": 0.40393373370170593, "epoch": 6.054794520547945, "grad_norm": 1.488581594121984, "kl": 0.7873808741569519, "learning_rate": 3.488013698630137e-07, "loss": -0.0271, "step": 884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 749.0, "completions/max_terminated_length": 749.0, "completions/mean_length": 105.64286041259766, "completions/mean_terminated_length": 105.64286041259766, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.23982922732830048, "epoch": 6.061643835616438, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.19861846416596, "kl": 1.0501596927642822, "learning_rate": 3.4863013698630135e-07, "loss": -0.0669, "num_tokens": 10684239.0, "reward": 0.6066625714302063, "reward_std": 0.07635073363780975, "rewards/check_gptzero_func/mean": 0.6066625714302063, "rewards/check_gptzero_func/std": 0.4128660559654236, "sampling/importance_sampling_ratio/max": 1.407869815826416, "sampling/importance_sampling_ratio/mean": 0.9996272325515747, "sampling/importance_sampling_ratio/min": 0.5277628898620605, "sampling/sampling_logp_difference/max": 0.6391081809997559, "sampling/sampling_logp_difference/mean": 0.010982356034219265, "step": 885 }, { "clip_ratio/high_max": 0.035543765872716904, "clip_ratio/high_mean": 0.015255287289619446, "clip_ratio/low_mean": 0.011882273480296135, "clip_ratio/low_min": 0.002197802299633622, "clip_ratio/region_mean": 0.02713756076991558, "entropy": 0.24126826226711273, "epoch": 6.068493150684931, "grad_norm": 1.6861921301201854, "kl": 1.0263845920562744, "learning_rate": 3.48458904109589e-07, "loss": -0.0768, "step": 886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 993.0, "completions/max_terminated_length": 993.0, "completions/mean_length": 156.9107208251953, "completions/mean_terminated_length": 156.9107208251953, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.33159011602401733, "epoch": 6.075342465753424, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.88472989017855, "kl": 0.834723949432373, "learning_rate": 3.482876712328767e-07, "loss": -0.0272, "num_tokens": 10698102.0, "reward": 0.6980194449424744, "reward_std": 0.08055482804775238, "rewards/check_gptzero_func/mean": 0.6980194449424744, "rewards/check_gptzero_func/std": 0.3564770519733429, "sampling/importance_sampling_ratio/max": 1.60074782371521, "sampling/importance_sampling_ratio/mean": 1.000213384628296, "sampling/importance_sampling_ratio/min": 0.617262601852417, "sampling/sampling_logp_difference/max": 0.48246073722839355, "sampling/sampling_logp_difference/mean": 0.013208949007093906, "step": 887 }, { "clip_ratio/high_max": 0.026224981993436813, "clip_ratio/high_mean": 0.01281664427369833, "clip_ratio/low_mean": 0.00929790548980236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.022114548832178116, "entropy": 0.3328295648097992, "epoch": 6.082191780821918, "grad_norm": 1.5360482626648662, "kl": 0.8318118453025818, "learning_rate": 3.481164383561644e-07, "loss": -0.0372, "step": 888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1401.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 184.10714721679688, "completions/mean_terminated_length": 184.10714721679688, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "entropy": 0.4538552761077881, "epoch": 6.089041095890411, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.9294327413767522, "kl": 1.0379456281661987, "learning_rate": 3.4794520547945205e-07, "loss": 0.0321, "num_tokens": 10712792.0, "reward": 0.7472462058067322, "reward_std": 0.17013874650001526, "rewards/check_gptzero_func/mean": 0.7472462058067322, "rewards/check_gptzero_func/std": 0.3542933762073517, "sampling/importance_sampling_ratio/max": 1.5289803743362427, "sampling/importance_sampling_ratio/mean": 1.0007449388504028, "sampling/importance_sampling_ratio/min": 0.5943604111671448, "sampling/sampling_logp_difference/max": 0.5202693939208984, "sampling/sampling_logp_difference/mean": 0.016380328685045242, "step": 889 }, { "clip_ratio/high_max": 0.020066889002919197, "clip_ratio/high_mean": 0.010295904241502285, "clip_ratio/low_mean": 0.011501132510602474, "clip_ratio/low_min": 0.0037243948318064213, "clip_ratio/region_mean": 0.02179703488945961, "entropy": 0.45320138335227966, "epoch": 6.095890410958904, "grad_norm": 1.645358039131078, "kl": 1.018196702003479, "learning_rate": 3.477739726027397e-07, "loss": 0.021, "step": 890 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1116.0, "completions/max_terminated_length": 1116.0, "completions/mean_length": 166.92857360839844, "completions/mean_terminated_length": 166.92857360839844, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.3336053192615509, "epoch": 6.102739726027397, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.899635123139042, "kl": 0.8609248399734497, "learning_rate": 3.476027397260274e-07, "loss": -0.0444, "num_tokens": 10727068.0, "reward": 0.8183155655860901, "reward_std": 0.057445358484983444, "rewards/check_gptzero_func/mean": 0.8183155059814453, "rewards/check_gptzero_func/std": 0.2861531972885132, "sampling/importance_sampling_ratio/max": 1.495945692062378, "sampling/importance_sampling_ratio/mean": 1.0002155303955078, "sampling/importance_sampling_ratio/min": 0.47694310545921326, "sampling/sampling_logp_difference/max": 0.7403581142425537, "sampling/sampling_logp_difference/mean": 0.012411157600581646, "step": 891 }, { "clip_ratio/high_max": 0.021501585841178894, "clip_ratio/high_mean": 0.009572532959282398, "clip_ratio/low_mean": 0.0076297735795378685, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.017202306538820267, "entropy": 0.3340304493904114, "epoch": 6.109589041095891, "grad_norm": 1.5629201045315537, "kl": 0.8548534512519836, "learning_rate": 3.4743150684931506e-07, "loss": -0.0554, "step": 892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 529.0, "completions/max_terminated_length": 529.0, "completions/mean_length": 119.30357360839844, "completions/mean_terminated_length": 119.30357360839844, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.3563700318336487, "epoch": 6.116438356164384, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.1078964628167607, "kl": 0.8780746459960938, "learning_rate": 3.472602739726027e-07, "loss": 0.0255, "num_tokens": 10738681.0, "reward": 0.715900719165802, "reward_std": 0.0982455387711525, "rewards/check_gptzero_func/mean": 0.715900719165802, "rewards/check_gptzero_func/std": 0.34285905957221985, "sampling/importance_sampling_ratio/max": 1.653975009918213, "sampling/importance_sampling_ratio/mean": 1.0004761219024658, "sampling/importance_sampling_ratio/min": 0.6262631416320801, "sampling/sampling_logp_difference/max": 0.5031814575195312, "sampling/sampling_logp_difference/mean": 0.015254421159625053, "step": 893 }, { "clip_ratio/high_max": 0.02166934125125408, "clip_ratio/high_mean": 0.012686685658991337, "clip_ratio/low_mean": 0.013793830759823322, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.02648051455616951, "entropy": 0.35616523027420044, "epoch": 6.123287671232877, "grad_norm": 1.68641472075063, "kl": 0.8768764138221741, "learning_rate": 3.4708904109589036e-07, "loss": 0.0147, "step": 894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 258.3571472167969, "completions/mean_terminated_length": 258.3571472167969, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.5581523776054382, "epoch": 6.13013698630137, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.6300277014131777, "kl": 0.6634880900382996, "learning_rate": 3.4691780821917806e-07, "loss": -0.0427, "num_tokens": 10758415.0, "reward": 0.7188885807991028, "reward_std": 0.09363286197185516, "rewards/check_gptzero_func/mean": 0.7188885807991028, "rewards/check_gptzero_func/std": 0.3277440667152405, "sampling/importance_sampling_ratio/max": 1.4332550764083862, "sampling/importance_sampling_ratio/mean": 1.0007057189941406, "sampling/importance_sampling_ratio/min": 0.4802415370941162, "sampling/sampling_logp_difference/max": 0.7334661483764648, "sampling/sampling_logp_difference/mean": 0.018559183925390244, "step": 895 }, { "clip_ratio/high_max": 0.014115571044385433, "clip_ratio/high_mean": 0.009486923925578594, "clip_ratio/low_mean": 0.0064992234110832214, "clip_ratio/low_min": 0.002955665113404393, "clip_ratio/region_mean": 0.01598614640533924, "entropy": 0.5604439973831177, "epoch": 6.136986301369863, "grad_norm": 1.4382033761583708, "kl": 0.6618337035179138, "learning_rate": 3.4674657534246577e-07, "loss": -0.0519, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 332.39288330078125, "completions/mean_terminated_length": 332.39288330078125, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.4965360462665558, "epoch": 6.1438356164383565, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.4911549693701396, "kl": 0.8261586427688599, "learning_rate": 3.465753424657534e-07, "loss": -0.0169, "num_tokens": 10782197.0, "reward": 0.616834819316864, "reward_std": 0.08164849132299423, "rewards/check_gptzero_func/mean": 0.6168347597122192, "rewards/check_gptzero_func/std": 0.3865586221218109, "sampling/importance_sampling_ratio/max": 1.8880658149719238, "sampling/importance_sampling_ratio/mean": 0.9992488026618958, "sampling/importance_sampling_ratio/min": 0.5792080760002136, "sampling/sampling_logp_difference/max": 0.6355528831481934, "sampling/sampling_logp_difference/mean": 0.017688190564513206, "step": 897 }, { "clip_ratio/high_max": 0.0181818176060915, "clip_ratio/high_mean": 0.010796112939715385, "clip_ratio/low_mean": 0.004554455168545246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.015350567176938057, "entropy": 0.496358186006546, "epoch": 6.1506849315068495, "grad_norm": 1.2939252400453534, "kl": 0.7995032072067261, "learning_rate": 3.464041095890411e-07, "loss": -0.0253, "step": 898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1020.0, "completions/max_terminated_length": 1020.0, "completions/mean_length": 147.7678680419922, "completions/mean_terminated_length": 147.7678680419922, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "entropy": 0.40872225165367126, "epoch": 6.157534246575342, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.0361747078261048, "kl": 0.9802113175392151, "learning_rate": 3.4623287671232877e-07, "loss": -0.01, "num_tokens": 10794824.0, "reward": 0.7637414336204529, "reward_std": 0.09032682329416275, "rewards/check_gptzero_func/mean": 0.7637414336204529, "rewards/check_gptzero_func/std": 0.3094952702522278, "sampling/importance_sampling_ratio/max": 1.3549962043762207, "sampling/importance_sampling_ratio/mean": 1.0006290674209595, "sampling/importance_sampling_ratio/min": 0.7182193994522095, "sampling/sampling_logp_difference/max": 0.33098018169403076, "sampling/sampling_logp_difference/mean": 0.014503855258226395, "step": 899 }, { "clip_ratio/high_max": 0.02089378982782364, "clip_ratio/high_mean": 0.010890625417232513, "clip_ratio/low_mean": 0.008530871011316776, "clip_ratio/low_min": 0.0018518518190830946, "clip_ratio/region_mean": 0.019421497359871864, "entropy": 0.4108532965183258, "epoch": 6.164383561643835, "grad_norm": 1.718322747603031, "kl": 0.97312992811203, "learning_rate": 3.460616438356164e-07, "loss": -0.0206, "step": 900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1124.0, "completions/max_terminated_length": 1124.0, "completions/mean_length": 145.6607208251953, "completions/mean_terminated_length": 145.6607208251953, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.20853158831596375, "epoch": 6.171232876712328, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.0598803735017626, "kl": 0.7365431785583496, "learning_rate": 3.4589041095890407e-07, "loss": 0.0543, "num_tokens": 10807839.0, "reward": 0.8553917407989502, "reward_std": 0.08281786739826202, "rewards/check_gptzero_func/mean": 0.8553916811943054, "rewards/check_gptzero_func/std": 0.2545897364616394, "sampling/importance_sampling_ratio/max": 1.4360480308532715, "sampling/importance_sampling_ratio/mean": 0.9999018907546997, "sampling/importance_sampling_ratio/min": 0.6665824055671692, "sampling/sampling_logp_difference/max": 0.4055914878845215, "sampling/sampling_logp_difference/mean": 0.007572533097118139, "step": 901 }, { "clip_ratio/high_max": 0.0170980766415596, "clip_ratio/high_mean": 0.004875890910625458, "clip_ratio/low_mean": 0.00873070489615202, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.013606597669422626, "entropy": 0.20678581297397614, "epoch": 6.178082191780822, "grad_norm": 1.5458704130453569, "kl": 0.7425022721290588, "learning_rate": 3.4571917808219177e-07, "loss": 0.0441, "step": 902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1109.0, "completions/max_terminated_length": 1109.0, "completions/mean_length": 185.83929443359375, "completions/mean_terminated_length": 185.83929443359375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.4826340675354004, "epoch": 6.184931506849315, "frac_reward_zero_std": 0.0, "grad_norm": 1.906419466749839, "kl": 0.7159010767936707, "learning_rate": 3.455479452054794e-07, "loss": -0.0311, "num_tokens": 10822878.0, "reward": 0.6147007346153259, "reward_std": 0.06427379697561264, "rewards/check_gptzero_func/mean": 0.6147007346153259, "rewards/check_gptzero_func/std": 0.40338408946990967, "sampling/importance_sampling_ratio/max": 1.4509845972061157, "sampling/importance_sampling_ratio/mean": 0.9996699094772339, "sampling/importance_sampling_ratio/min": 0.0821480005979538, "sampling/sampling_logp_difference/max": 2.499232769012451, "sampling/sampling_logp_difference/mean": 0.017038825899362564, "step": 903 }, { "clip_ratio/high_max": 0.024025974795222282, "clip_ratio/high_mean": 0.014966830611228943, "clip_ratio/low_mean": 0.010411730967462063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.025378558784723282, "entropy": 0.4832678437232971, "epoch": 6.191780821917808, "grad_norm": 1.689857289164437, "kl": 0.7050771713256836, "learning_rate": 3.453767123287671e-07, "loss": -0.0418, "step": 904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 200.33929443359375, "completions/mean_terminated_length": 200.33929443359375, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 0.5076590776443481, "epoch": 6.198630136986301, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.861788531102815, "kl": 0.7281795740127563, "learning_rate": 3.4520547945205483e-07, "loss": 0.0241, "num_tokens": 10838631.0, "reward": 0.7107842564582825, "reward_std": 0.11293849349021912, "rewards/check_gptzero_func/mean": 0.7107842564582825, "rewards/check_gptzero_func/std": 0.3497215509414673, "sampling/importance_sampling_ratio/max": 1.6157732009887695, "sampling/importance_sampling_ratio/mean": 0.9998043775558472, "sampling/importance_sampling_ratio/min": 0.6855385303497314, "sampling/sampling_logp_difference/max": 0.4798135757446289, "sampling/sampling_logp_difference/mean": 0.01767391711473465, "step": 905 }, { "clip_ratio/high_max": 0.025714285671710968, "clip_ratio/high_mean": 0.014865093864500523, "clip_ratio/low_mean": 0.010171613655984402, "clip_ratio/low_min": 0.007015650160610676, "clip_ratio/region_mean": 0.025036707520484924, "entropy": 0.5083141922950745, "epoch": 6.205479452054795, "grad_norm": 1.5800239728238263, "kl": 0.7388750910758972, "learning_rate": 3.450342465753425e-07, "loss": 0.0138, "step": 906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1130.0, "completions/max_terminated_length": 1130.0, "completions/mean_length": 129.83929443359375, "completions/mean_terminated_length": 129.83929443359375, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "entropy": 0.29440760612487793, "epoch": 6.212328767123288, "frac_reward_zero_std": 0.2142857313156128, "grad_norm": 1.9777086714156826, "kl": 0.8143724799156189, "learning_rate": 3.4486301369863013e-07, "loss": 0.011, "num_tokens": 10850764.0, "reward": 0.83638995885849, "reward_std": 0.0959247574210167, "rewards/check_gptzero_func/mean": 0.83638995885849, "rewards/check_gptzero_func/std": 0.28627854585647583, "sampling/importance_sampling_ratio/max": 1.3477427959442139, "sampling/importance_sampling_ratio/mean": 0.9998083114624023, "sampling/importance_sampling_ratio/min": 0.7084723114967346, "sampling/sampling_logp_difference/max": 0.34464430809020996, "sampling/sampling_logp_difference/mean": 0.010750751942396164, "step": 907 }, { "clip_ratio/high_max": 0.018518518656492233, "clip_ratio/high_mean": 0.008965907618403435, "clip_ratio/low_mean": 0.01256160531193018, "clip_ratio/low_min": 0.008865248411893845, "clip_ratio/region_mean": 0.02152751199901104, "entropy": 0.29390329122543335, "epoch": 6.219178082191781, "grad_norm": 1.5975575963257933, "kl": 0.8144002556800842, "learning_rate": 3.446917808219178e-07, "loss": 0.0005, "step": 908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1131.0, "completions/max_terminated_length": 1131.0, "completions/mean_length": 130.19644165039062, "completions/mean_terminated_length": 130.19644165039062, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.2270555943250656, "epoch": 6.226027397260274, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.0073855682777344, "kl": 1.0630282163619995, "learning_rate": 3.445205479452055e-07, "loss": -0.0003, "num_tokens": 10862521.0, "reward": 0.866646945476532, "reward_std": 0.03888774663209915, "rewards/check_gptzero_func/mean": 0.8666468858718872, "rewards/check_gptzero_func/std": 0.2818998396396637, "sampling/importance_sampling_ratio/max": 1.3238823413848877, "sampling/importance_sampling_ratio/mean": 1.000073790550232, "sampling/importance_sampling_ratio/min": 0.6401443481445312, "sampling/sampling_logp_difference/max": 0.4460616111755371, "sampling/sampling_logp_difference/mean": 0.00837106816470623, "step": 909 }, { "clip_ratio/high_max": 0.020091325044631958, "clip_ratio/high_mean": 0.0077792322263121605, "clip_ratio/low_mean": 0.008815460838377476, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.016594693064689636, "entropy": 0.22565947473049164, "epoch": 6.232876712328767, "grad_norm": 1.5951573685713807, "kl": 1.0644744634628296, "learning_rate": 3.4434931506849314e-07, "loss": -0.0104, "step": 910 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1219.0, "completions/max_terminated_length": 1219.0, "completions/mean_length": 113.37500762939453, "completions/mean_terminated_length": 113.37500762939453, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.2176952362060547, "epoch": 6.239726027397261, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.211564421623582, "kl": 0.9654788970947266, "learning_rate": 3.441780821917808e-07, "loss": 0.0147, "num_tokens": 10873690.0, "reward": 0.7782890200614929, "reward_std": 0.045802365988492966, "rewards/check_gptzero_func/mean": 0.7782889604568481, "rewards/check_gptzero_func/std": 0.3425005376338959, "sampling/importance_sampling_ratio/max": 1.348062515258789, "sampling/importance_sampling_ratio/mean": 0.9995940327644348, "sampling/importance_sampling_ratio/min": 0.6280799508094788, "sampling/sampling_logp_difference/max": 0.465087890625, "sampling/sampling_logp_difference/mean": 0.008296932093799114, "step": 911 }, { "clip_ratio/high_max": 0.03218390792608261, "clip_ratio/high_mean": 0.010196663439273834, "clip_ratio/low_mean": 0.010029667988419533, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.020226331427693367, "entropy": 0.2173781543970108, "epoch": 6.2465753424657535, "grad_norm": 1.7160021907676835, "kl": 0.966776430606842, "learning_rate": 3.4400684931506844e-07, "loss": 0.0061, "step": 912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1070.0, "completions/max_terminated_length": 1070.0, "completions/mean_length": 166.82144165039062, "completions/mean_terminated_length": 166.82144165039062, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.36365827918052673, "epoch": 6.2534246575342465, "frac_reward_zero_std": 0.2857142984867096, "grad_norm": 1.9843739532726266, "kl": 0.9309769868850708, "learning_rate": 3.4383561643835614e-07, "loss": 0.0193, "num_tokens": 10887958.0, "reward": 0.7462854981422424, "reward_std": 0.09488038718700409, "rewards/check_gptzero_func/mean": 0.7462854385375977, "rewards/check_gptzero_func/std": 0.3627999424934387, "sampling/importance_sampling_ratio/max": 1.4486597776412964, "sampling/importance_sampling_ratio/mean": 1.0011768341064453, "sampling/importance_sampling_ratio/min": 0.6223042607307434, "sampling/sampling_logp_difference/max": 0.47432613372802734, "sampling/sampling_logp_difference/mean": 0.014167562127113342, "step": 913 }, { "clip_ratio/high_max": 0.017717717215418816, "clip_ratio/high_mean": 0.009298833087086678, "clip_ratio/low_mean": 0.007026708219200373, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.016325540840625763, "entropy": 0.36522918939590454, "epoch": 6.260273972602739, "grad_norm": 1.6041570673507541, "kl": 0.9061369299888611, "learning_rate": 3.4366438356164384e-07, "loss": 0.0092, "step": 914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 143.57144165039062, "completions/mean_terminated_length": 143.57144165039062, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.37258583307266235, "epoch": 6.267123287671233, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.9482688956883745, "kl": 0.853862464427948, "learning_rate": 3.434931506849315e-07, "loss": 0.0315, "num_tokens": 10901090.0, "reward": 0.7429117560386658, "reward_std": 0.05738169327378273, "rewards/check_gptzero_func/mean": 0.7429117560386658, "rewards/check_gptzero_func/std": 0.35476067662239075, "sampling/importance_sampling_ratio/max": 1.4129400253295898, "sampling/importance_sampling_ratio/mean": 1.0006012916564941, "sampling/importance_sampling_ratio/min": 0.6457658410072327, "sampling/sampling_logp_difference/max": 0.437318354845047, "sampling/sampling_logp_difference/mean": 0.013698163442313671, "step": 915 }, { "clip_ratio/high_max": 0.018001800402998924, "clip_ratio/high_mean": 0.010360995307564735, "clip_ratio/low_mean": 0.012081168591976166, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0224421638995409, "entropy": 0.3728906810283661, "epoch": 6.273972602739726, "grad_norm": 1.673524601418274, "kl": 0.8626027703285217, "learning_rate": 3.433219178082192e-07, "loss": 0.0213, "step": 916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1702.0, "completions/max_terminated_length": 1702.0, "completions/mean_length": 193.07144165039062, "completions/mean_terminated_length": 193.07144165039062, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.41012755036354065, "epoch": 6.280821917808219, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.911648876085436, "kl": 0.8881774544715881, "learning_rate": 3.4315068493150685e-07, "loss": -0.0352, "num_tokens": 10916682.0, "reward": 0.8659796714782715, "reward_std": 0.08249437808990479, "rewards/check_gptzero_func/mean": 0.8659796118736267, "rewards/check_gptzero_func/std": 0.22655028104782104, "sampling/importance_sampling_ratio/max": 1.4734866619110107, "sampling/importance_sampling_ratio/mean": 1.0001381635665894, "sampling/importance_sampling_ratio/min": 0.6791829466819763, "sampling/sampling_logp_difference/max": 0.3876314163208008, "sampling/sampling_logp_difference/mean": 0.01328125037252903, "step": 917 }, { "clip_ratio/high_max": 0.020479654893279076, "clip_ratio/high_mean": 0.008895184844732285, "clip_ratio/low_mean": 0.006634850520640612, "clip_ratio/low_min": 0.0015384615398943424, "clip_ratio/region_mean": 0.015530034899711609, "entropy": 0.4097318947315216, "epoch": 6.287671232876712, "grad_norm": 1.5846033385091625, "kl": 0.8677541613578796, "learning_rate": 3.429794520547945e-07, "loss": -0.0456, "step": 918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1031.0, "completions/max_terminated_length": 1031.0, "completions/mean_length": 118.5714340209961, "completions/mean_terminated_length": 118.5714340209961, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "entropy": 0.3529672920703888, "epoch": 6.294520547945205, "frac_reward_zero_std": 0.2142857313156128, "grad_norm": 2.1596842188018686, "kl": 0.7924655675888062, "learning_rate": 3.4280821917808215e-07, "loss": 0.0694, "num_tokens": 10927636.0, "reward": 0.7606658339500427, "reward_std": 0.10882039368152618, "rewards/check_gptzero_func/mean": 0.7606658339500427, "rewards/check_gptzero_func/std": 0.36026692390441895, "sampling/importance_sampling_ratio/max": 1.3066256046295166, "sampling/importance_sampling_ratio/mean": 0.9996699094772339, "sampling/importance_sampling_ratio/min": 0.6771495342254639, "sampling/sampling_logp_difference/max": 0.38986313343048096, "sampling/sampling_logp_difference/mean": 0.012935904785990715, "step": 919 }, { "clip_ratio/high_max": 0.02154398523271084, "clip_ratio/high_mean": 0.014018439687788486, "clip_ratio/low_mean": 0.008642853237688541, "clip_ratio/low_min": 0.0019801980815827847, "clip_ratio/region_mean": 0.022661294788122177, "entropy": 0.354063481092453, "epoch": 6.301369863013699, "grad_norm": 1.7719057251638342, "kl": 0.7860745787620544, "learning_rate": 3.4263698630136985e-07, "loss": 0.0583, "step": 920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 151.6428680419922, "completions/mean_terminated_length": 151.6428680419922, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.3287952244281769, "epoch": 6.308219178082192, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.9270376023536138, "kl": 0.9130885004997253, "learning_rate": 3.424657534246575e-07, "loss": -0.0128, "num_tokens": 10940862.0, "reward": 0.7729828357696533, "reward_std": 0.06718891859054565, "rewards/check_gptzero_func/mean": 0.7729827165603638, "rewards/check_gptzero_func/std": 0.369841605424881, "sampling/importance_sampling_ratio/max": 1.5114452838897705, "sampling/importance_sampling_ratio/mean": 1.000636339187622, "sampling/importance_sampling_ratio/min": 0.6328771710395813, "sampling/sampling_logp_difference/max": 0.45747900009155273, "sampling/sampling_logp_difference/mean": 0.012095211073756218, "step": 921 }, { "clip_ratio/high_max": 0.018691588193178177, "clip_ratio/high_mean": 0.012747718021273613, "clip_ratio/low_mean": 0.007938407361507416, "clip_ratio/low_min": 0.0015552099794149399, "clip_ratio/region_mean": 0.02068612352013588, "entropy": 0.330637663602829, "epoch": 6.315068493150685, "grad_norm": 1.6417685133250761, "kl": 0.9003267288208008, "learning_rate": 3.4229452054794515e-07, "loss": -0.0232, "step": 922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 134.3928680419922, "completions/mean_terminated_length": 134.3928680419922, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.31461378931999207, "epoch": 6.321917808219178, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.9926816176940438, "kl": 0.8603158593177795, "learning_rate": 3.421232876712329e-07, "loss": 0.0054, "num_tokens": 10953204.0, "reward": 0.7646821141242981, "reward_std": 0.0940299779176712, "rewards/check_gptzero_func/mean": 0.7646821141242981, "rewards/check_gptzero_func/std": 0.3221418857574463, "sampling/importance_sampling_ratio/max": 1.5077953338623047, "sampling/importance_sampling_ratio/mean": 0.9998186230659485, "sampling/importance_sampling_ratio/min": 0.5676485896110535, "sampling/sampling_logp_difference/max": 0.5662527084350586, "sampling/sampling_logp_difference/mean": 0.011252665892243385, "step": 923 }, { "clip_ratio/high_max": 0.022058824077248573, "clip_ratio/high_mean": 0.012786856852471828, "clip_ratio/low_mean": 0.008151985704898834, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.020938843488693237, "entropy": 0.31525906920433044, "epoch": 6.328767123287671, "grad_norm": 1.579877159512306, "kl": 0.8430331945419312, "learning_rate": 3.4195205479452056e-07, "loss": -0.0058, "step": 924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 967.0, "completions/max_terminated_length": 967.0, "completions/mean_length": 135.3928680419922, "completions/mean_terminated_length": 135.3928680419922, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.3506886959075928, "epoch": 6.335616438356165, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.9685489476105547, "kl": 0.8526949286460876, "learning_rate": 3.417808219178082e-07, "loss": -0.02, "num_tokens": 10965482.0, "reward": 0.8115677237510681, "reward_std": 0.05216076597571373, "rewards/check_gptzero_func/mean": 0.8115677833557129, "rewards/check_gptzero_func/std": 0.33188703656196594, "sampling/importance_sampling_ratio/max": 1.305165410041809, "sampling/importance_sampling_ratio/mean": 0.9994789958000183, "sampling/importance_sampling_ratio/min": 0.6446672677993774, "sampling/sampling_logp_difference/max": 0.4390209913253784, "sampling/sampling_logp_difference/mean": 0.012238330207765102, "step": 925 }, { "clip_ratio/high_max": 0.01647183857858181, "clip_ratio/high_mean": 0.010795334354043007, "clip_ratio/low_mean": 0.006831563077867031, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.017626898363232613, "entropy": 0.3524510860443115, "epoch": 6.342465753424658, "grad_norm": 1.7105104973614391, "kl": 0.8535172343254089, "learning_rate": 3.4160958904109586e-07, "loss": -0.0306, "step": 926 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 245.32144165039062, "completions/mean_terminated_length": 245.32144165039062, "completions/min_length": 9.0, "completions/min_terminated_length": 9.0, "entropy": 0.5577754378318787, "epoch": 6.3493150684931505, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 2.0246117857229238, "kl": 0.933309018611908, "learning_rate": 3.4143835616438356e-07, "loss": 0.0, "num_tokens": 10984010.0, "reward": 0.6725048422813416, "reward_std": 0.08340457081794739, "rewards/check_gptzero_func/mean": 0.6725048422813416, "rewards/check_gptzero_func/std": 0.3645648956298828, "sampling/importance_sampling_ratio/max": 1.8292597532272339, "sampling/importance_sampling_ratio/mean": 0.9998714327812195, "sampling/importance_sampling_ratio/min": 0.6967576146125793, "sampling/sampling_logp_difference/max": 0.6039113998413086, "sampling/sampling_logp_difference/mean": 0.019356384873390198, "step": 927 }, { "clip_ratio/high_max": 0.023607175797224045, "clip_ratio/high_mean": 0.013473953120410442, "clip_ratio/low_mean": 0.005830774549394846, "clip_ratio/low_min": 0.003071252955123782, "clip_ratio/region_mean": 0.019304728135466576, "entropy": 0.5609920620918274, "epoch": 6.3561643835616435, "grad_norm": 1.5731494586829133, "kl": 0.7095223069190979, "learning_rate": 3.412671232876712e-07, "loss": -0.0098, "step": 928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1201.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 166.0, "completions/mean_terminated_length": 166.0, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.27684617042541504, "epoch": 6.363013698630137, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.9494954663747184, "kl": 0.9613848924636841, "learning_rate": 3.4109589041095886e-07, "loss": 0.022, "num_tokens": 10997644.0, "reward": 0.7001769542694092, "reward_std": 0.051837481558322906, "rewards/check_gptzero_func/mean": 0.7001768946647644, "rewards/check_gptzero_func/std": 0.35814952850341797, "sampling/importance_sampling_ratio/max": 1.5430892705917358, "sampling/importance_sampling_ratio/mean": 1.000466227531433, "sampling/importance_sampling_ratio/min": 0.6262689828872681, "sampling/sampling_logp_difference/max": 0.467975378036499, "sampling/sampling_logp_difference/mean": 0.011716471053659916, "step": 929 }, { "clip_ratio/high_max": 0.01697239838540554, "clip_ratio/high_mean": 0.009015413001179695, "clip_ratio/low_mean": 0.009533354081213474, "clip_ratio/low_min": 0.00615384615957737, "clip_ratio/region_mean": 0.018548768013715744, "entropy": 0.2750854790210724, "epoch": 6.36986301369863, "grad_norm": 1.593477912366146, "kl": 0.9673056602478027, "learning_rate": 3.4092465753424657e-07, "loss": 0.0119, "step": 930 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 181.6607208251953, "completions/mean_terminated_length": 181.6607208251953, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "entropy": 0.47221142053604126, "epoch": 6.376712328767123, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.9898090326969862, "kl": 0.8942444920539856, "learning_rate": 3.407534246575342e-07, "loss": -0.0193, "num_tokens": 11012657.0, "reward": 0.6856136918067932, "reward_std": 0.09549658745527267, "rewards/check_gptzero_func/mean": 0.6856136322021484, "rewards/check_gptzero_func/std": 0.3767335116863251, "sampling/importance_sampling_ratio/max": 1.5305943489074707, "sampling/importance_sampling_ratio/mean": 0.9987404942512512, "sampling/importance_sampling_ratio/min": 0.6751255393028259, "sampling/sampling_logp_difference/max": 0.4256560802459717, "sampling/sampling_logp_difference/mean": 0.016031572595238686, "step": 931 }, { "clip_ratio/high_max": 0.01722319796681404, "clip_ratio/high_mean": 0.010445788502693176, "clip_ratio/low_mean": 0.009679170325398445, "clip_ratio/low_min": 0.007180469110608101, "clip_ratio/region_mean": 0.02012495882809162, "entropy": 0.474283903837204, "epoch": 6.383561643835616, "grad_norm": 1.7080865582404128, "kl": 0.8736454248428345, "learning_rate": 3.405821917808219e-07, "loss": -0.0302, "step": 932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1240.0, "completions/max_terminated_length": 1240.0, "completions/mean_length": 245.7678680419922, "completions/mean_terminated_length": 245.7678680419922, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.41717103123664856, "epoch": 6.390410958904109, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.6789257690761772, "kl": 0.7579818367958069, "learning_rate": 3.4041095890410957e-07, "loss": 0.0164, "num_tokens": 11031194.0, "reward": 0.7218555808067322, "reward_std": 0.06581243127584457, "rewards/check_gptzero_func/mean": 0.7218555808067322, "rewards/check_gptzero_func/std": 0.3161913752555847, "sampling/importance_sampling_ratio/max": 1.754245638847351, "sampling/importance_sampling_ratio/mean": 0.999884307384491, "sampling/importance_sampling_ratio/min": 0.6837969422340393, "sampling/sampling_logp_difference/max": 0.5620388984680176, "sampling/sampling_logp_difference/mean": 0.014865697361528873, "step": 933 }, { "clip_ratio/high_max": 0.018518518656492233, "clip_ratio/high_mean": 0.00847845058888197, "clip_ratio/low_mean": 0.005737608764320612, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.014216058887541294, "entropy": 0.4179310202598572, "epoch": 6.397260273972603, "grad_norm": 1.4742438930114252, "kl": 0.7481013536453247, "learning_rate": 3.402397260273973e-07, "loss": 0.0068, "step": 934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 827.0, "completions/max_terminated_length": 827.0, "completions/mean_length": 145.9107208251953, "completions/mean_terminated_length": 145.9107208251953, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.3332368731498718, "epoch": 6.404109589041096, "frac_reward_zero_std": 0.25, "grad_norm": 2.012275809683051, "kl": 0.9882373809814453, "learning_rate": 3.400684931506849e-07, "loss": -0.0269, "num_tokens": 11044337.0, "reward": 0.8269354104995728, "reward_std": 0.050469763576984406, "rewards/check_gptzero_func/mean": 0.826935350894928, "rewards/check_gptzero_func/std": 0.31254246830940247, "sampling/importance_sampling_ratio/max": 1.4421756267547607, "sampling/importance_sampling_ratio/mean": 0.9998273849487305, "sampling/importance_sampling_ratio/min": 0.6839818954467773, "sampling/sampling_logp_difference/max": 0.37982386350631714, "sampling/sampling_logp_difference/mean": 0.011645453050732613, "step": 935 }, { "clip_ratio/high_max": 0.021381579339504242, "clip_ratio/high_mean": 0.011116398498415947, "clip_ratio/low_mean": 0.007107918616384268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.018224315717816353, "entropy": 0.33264273405075073, "epoch": 6.410958904109589, "grad_norm": 1.6649370581216405, "kl": 0.9866158366203308, "learning_rate": 3.398972602739726e-07, "loss": -0.0375, "step": 936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 319.125, "completions/mean_terminated_length": 319.125, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.5503525137901306, "epoch": 6.417808219178082, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.4434597644099787, "kl": 0.5719168782234192, "learning_rate": 3.397260273972602e-07, "loss": 0.0254, "num_tokens": 11066632.0, "reward": 0.7409278750419617, "reward_std": 0.04434830695390701, "rewards/check_gptzero_func/mean": 0.7409278154373169, "rewards/check_gptzero_func/std": 0.31635287404060364, "sampling/importance_sampling_ratio/max": 1.507166862487793, "sampling/importance_sampling_ratio/mean": 1.0003505945205688, "sampling/importance_sampling_ratio/min": 0.621850848197937, "sampling/sampling_logp_difference/max": 0.4750549793243408, "sampling/sampling_logp_difference/mean": 0.01910380832850933, "step": 937 }, { "clip_ratio/high_max": 0.012687427923083305, "clip_ratio/high_mean": 0.00881450530141592, "clip_ratio/low_mean": 0.004827931523323059, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.01364243682473898, "entropy": 0.5500964522361755, "epoch": 6.424657534246576, "grad_norm": 1.2729063649755858, "kl": 0.5692769289016724, "learning_rate": 3.3955479452054793e-07, "loss": 0.0165, "step": 938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 873.0, "completions/max_terminated_length": 873.0, "completions/mean_length": 142.46429443359375, "completions/mean_terminated_length": 142.46429443359375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.39358076453208923, "epoch": 6.431506849315069, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.9949029247404555, "kl": 0.8212004899978638, "learning_rate": 3.393835616438356e-07, "loss": -0.0109, "num_tokens": 11079762.0, "reward": 0.7342345714569092, "reward_std": 0.1404537558555603, "rewards/check_gptzero_func/mean": 0.7342345118522644, "rewards/check_gptzero_func/std": 0.37902897596359253, "sampling/importance_sampling_ratio/max": 1.6598389148712158, "sampling/importance_sampling_ratio/mean": 1.0009713172912598, "sampling/importance_sampling_ratio/min": 0.6402227878570557, "sampling/sampling_logp_difference/max": 0.5067205429077148, "sampling/sampling_logp_difference/mean": 0.013360615819692612, "step": 939 }, { "clip_ratio/high_max": 0.018922852352261543, "clip_ratio/high_mean": 0.01068042404949665, "clip_ratio/low_mean": 0.007066923659294844, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.017747346311807632, "entropy": 0.3943028450012207, "epoch": 6.438356164383562, "grad_norm": 1.6346123078697405, "kl": 0.819355309009552, "learning_rate": 3.3921232876712323e-07, "loss": -0.0211, "step": 940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1805.0, "completions/max_terminated_length": 1805.0, "completions/mean_length": 216.5178680419922, "completions/mean_terminated_length": 216.5178680419922, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.4491299092769623, "epoch": 6.445205479452055, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.6587098214176001, "kl": 0.7326755523681641, "learning_rate": 3.39041095890411e-07, "loss": -0.0132, "num_tokens": 11096839.0, "reward": 0.7567785978317261, "reward_std": 0.07725319266319275, "rewards/check_gptzero_func/mean": 0.7567785978317261, "rewards/check_gptzero_func/std": 0.34642350673675537, "sampling/importance_sampling_ratio/max": 1.4757969379425049, "sampling/importance_sampling_ratio/mean": 0.9999951720237732, "sampling/importance_sampling_ratio/min": 0.7305793762207031, "sampling/sampling_logp_difference/max": 0.3891981840133667, "sampling/sampling_logp_difference/mean": 0.015011299401521683, "step": 941 }, { "clip_ratio/high_max": 0.019455252215266228, "clip_ratio/high_mean": 0.007727664429694414, "clip_ratio/low_mean": 0.006788528058677912, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.014516194351017475, "entropy": 0.44864708185195923, "epoch": 6.4520547945205475, "grad_norm": 1.4558101780722656, "kl": 0.7377511858940125, "learning_rate": 3.3886986301369864e-07, "loss": -0.0229, "step": 942 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 988.0, "completions/max_terminated_length": 988.0, "completions/mean_length": 135.21429443359375, "completions/mean_terminated_length": 135.21429443359375, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.22754092514514923, "epoch": 6.458904109589041, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.9016426554032977, "kl": 0.6576730608940125, "learning_rate": 3.386986301369863e-07, "loss": 0.0328, "num_tokens": 11109469.0, "reward": 0.8781057000160217, "reward_std": 0.032473813742399216, "rewards/check_gptzero_func/mean": 0.878105640411377, "rewards/check_gptzero_func/std": 0.26461920142173767, "sampling/importance_sampling_ratio/max": 1.3756710290908813, "sampling/importance_sampling_ratio/mean": 0.999568521976471, "sampling/importance_sampling_ratio/min": 0.6214653849601746, "sampling/sampling_logp_difference/max": 0.475675106048584, "sampling/sampling_logp_difference/mean": 0.008743003010749817, "step": 943 }, { "clip_ratio/high_max": 0.015625, "clip_ratio/high_mean": 0.007932969368994236, "clip_ratio/low_mean": 0.00576524343341589, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.013698212802410126, "entropy": 0.22742803394794464, "epoch": 6.465753424657534, "grad_norm": 1.5156292624597207, "kl": 0.6167407631874084, "learning_rate": 3.3852739726027394e-07, "loss": 0.0238, "step": 944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1155.0, "completions/max_terminated_length": 1155.0, "completions/mean_length": 158.5357208251953, "completions/mean_terminated_length": 158.5357208251953, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.2966555953025818, "epoch": 6.472602739726027, "frac_reward_zero_std": 0.2142857313156128, "grad_norm": 1.9682209698826802, "kl": 0.9518699049949646, "learning_rate": 3.3835616438356164e-07, "loss": 0.0296, "num_tokens": 11122813.0, "reward": 0.8341465592384338, "reward_std": 0.06737395375967026, "rewards/check_gptzero_func/mean": 0.8341464996337891, "rewards/check_gptzero_func/std": 0.2793629765510559, "sampling/importance_sampling_ratio/max": 1.4703515768051147, "sampling/importance_sampling_ratio/mean": 0.9998587965965271, "sampling/importance_sampling_ratio/min": 0.6776816248893738, "sampling/sampling_logp_difference/max": 0.38907766342163086, "sampling/sampling_logp_difference/mean": 0.010359250009059906, "step": 945 }, { "clip_ratio/high_max": 0.01804320141673088, "clip_ratio/high_mean": 0.006668349262326956, "clip_ratio/low_mean": 0.0060646734200417995, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012733022682368755, "entropy": 0.29694125056266785, "epoch": 6.47945205479452, "grad_norm": 1.6555788553873052, "kl": 0.8846732378005981, "learning_rate": 3.381849315068493e-07, "loss": 0.0187, "step": 946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1240.0, "completions/max_terminated_length": 1240.0, "completions/mean_length": 166.10714721679688, "completions/mean_terminated_length": 166.10714721679688, "completions/min_length": 9.0, "completions/min_terminated_length": 9.0, "entropy": 0.33742591738700867, "epoch": 6.486301369863014, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.7923794010837986, "kl": 0.9052896499633789, "learning_rate": 3.3801369863013694e-07, "loss": 0.0101, "num_tokens": 11137499.0, "reward": 0.8151586651802063, "reward_std": 0.1539134979248047, "rewards/check_gptzero_func/mean": 0.8151586651802063, "rewards/check_gptzero_func/std": 0.3054094612598419, "sampling/importance_sampling_ratio/max": 1.5370632410049438, "sampling/importance_sampling_ratio/mean": 0.9998937249183655, "sampling/importance_sampling_ratio/min": 0.6723102331161499, "sampling/sampling_logp_difference/max": 0.42987358570098877, "sampling/sampling_logp_difference/mean": 0.012652585282921791, "step": 947 }, { "clip_ratio/high_max": 0.01808176189661026, "clip_ratio/high_mean": 0.008503062650561333, "clip_ratio/low_mean": 0.009762121364474297, "clip_ratio/low_min": 0.004081632476300001, "clip_ratio/region_mean": 0.01826518215239048, "entropy": 0.33807602524757385, "epoch": 6.493150684931507, "grad_norm": 1.5537476528273955, "kl": 0.8829171061515808, "learning_rate": 3.3784246575342464e-07, "loss": 0.0007, "step": 948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 971.0, "completions/max_terminated_length": 971.0, "completions/mean_length": 123.85714721679688, "completions/mean_terminated_length": 123.85714721679688, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.3015020191669464, "epoch": 6.5, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.4402663055420404, "kl": 1.301487684249878, "learning_rate": 3.376712328767123e-07, "loss": 0.0357, "num_tokens": 11149421.0, "reward": 0.8083322048187256, "reward_std": 0.07090790569782257, "rewards/check_gptzero_func/mean": 0.8083321452140808, "rewards/check_gptzero_func/std": 0.31469836831092834, "sampling/importance_sampling_ratio/max": 1.5110396146774292, "sampling/importance_sampling_ratio/mean": 0.9998853802680969, "sampling/importance_sampling_ratio/min": 0.7122740745544434, "sampling/sampling_logp_difference/max": 0.4127979278564453, "sampling/sampling_logp_difference/mean": 0.010873516090214252, "step": 949 }, { "clip_ratio/high_max": 0.026539277285337448, "clip_ratio/high_mean": 0.012344683520495892, "clip_ratio/low_mean": 0.007291010115295649, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.019635694101452827, "entropy": 0.30365118384361267, "epoch": 6.506849315068493, "grad_norm": 1.780859697833698, "kl": 1.1168102025985718, "learning_rate": 3.375e-07, "loss": 0.0237, "step": 950 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.0, "completions/max_terminated_length": 207.0, "completions/mean_length": 69.42857360839844, "completions/mean_terminated_length": 69.42857360839844, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.05989638715982437, "epoch": 6.513698630136986, "frac_reward_zero_std": 0.2857142984867096, "grad_norm": 1.2868345770043048, "kl": 0.966456413269043, "learning_rate": 3.3732876712328765e-07, "loss": 0.0044, "num_tokens": 11158193.0, "reward": 0.9463000297546387, "reward_std": 0.00616389699280262, "rewards/check_gptzero_func/mean": 0.9462999701499939, "rewards/check_gptzero_func/std": 0.18214558064937592, "sampling/importance_sampling_ratio/max": 1.26508629322052, "sampling/importance_sampling_ratio/mean": 1.000285267829895, "sampling/importance_sampling_ratio/min": 0.7725916504859924, "sampling/sampling_logp_difference/max": 0.25800466537475586, "sampling/sampling_logp_difference/mean": 0.0025870916433632374, "step": 951 }, { "clip_ratio/high_max": 0.026737967506051064, "clip_ratio/high_mean": 0.004625971894711256, "clip_ratio/low_mean": 0.008420133963227272, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.013046106323599815, "entropy": 0.0598655603826046, "epoch": 6.52054794520548, "grad_norm": 0.8158863590035045, "kl": 0.8966950178146362, "learning_rate": 3.3715753424657535e-07, "loss": 0.0008, "step": 952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 187.19644165039062, "completions/mean_terminated_length": 187.19644165039062, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.3894321620464325, "epoch": 6.527397260273973, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.8529524295810667, "kl": 0.753280758857727, "learning_rate": 3.36986301369863e-07, "loss": 0.0121, "num_tokens": 11173138.0, "reward": 0.8405018448829651, "reward_std": 0.038325004279613495, "rewards/check_gptzero_func/mean": 0.8405017852783203, "rewards/check_gptzero_func/std": 0.2859131097793579, "sampling/importance_sampling_ratio/max": 1.3438900709152222, "sampling/importance_sampling_ratio/mean": 1.000137448310852, "sampling/importance_sampling_ratio/min": 0.6116675138473511, "sampling/sampling_logp_difference/max": 0.49156641960144043, "sampling/sampling_logp_difference/mean": 0.011878098361194134, "step": 953 }, { "clip_ratio/high_max": 0.017305314540863037, "clip_ratio/high_mean": 0.007110831793397665, "clip_ratio/low_mean": 0.0058428761549293995, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012953707948327065, "entropy": 0.3908567428588867, "epoch": 6.534246575342466, "grad_norm": 1.617691704135305, "kl": 0.7562424540519714, "learning_rate": 3.3681506849315065e-07, "loss": 0.001, "step": 954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2083.0, "completions/max_terminated_length": 2083.0, "completions/mean_length": 259.96429443359375, "completions/mean_terminated_length": 259.96429443359375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.4638529419898987, "epoch": 6.541095890410959, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.9738166336294338, "kl": 0.7828032374382019, "learning_rate": 3.3664383561643836e-07, "loss": -0.0587, "num_tokens": 11192724.0, "reward": 0.7323248982429504, "reward_std": 0.08084568381309509, "rewards/check_gptzero_func/mean": 0.7323248982429504, "rewards/check_gptzero_func/std": 0.3080506920814514, "sampling/importance_sampling_ratio/max": 1.4296432733535767, "sampling/importance_sampling_ratio/mean": 0.9997813105583191, "sampling/importance_sampling_ratio/min": 0.4653317630290985, "sampling/sampling_logp_difference/max": 0.7650046348571777, "sampling/sampling_logp_difference/mean": 0.014531600289046764, "step": 955 }, { "clip_ratio/high_max": 0.01244813296943903, "clip_ratio/high_mean": 0.006608060095459223, "clip_ratio/low_mean": 0.004639879800379276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.011247939430177212, "entropy": 0.4660985469818115, "epoch": 6.5479452054794525, "grad_norm": 5.28523449708252, "kl": 0.7527867555618286, "learning_rate": 3.36472602739726e-07, "loss": -0.0642, "step": 956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1333.0, "completions/max_terminated_length": 1333.0, "completions/mean_length": 278.8571472167969, "completions/mean_terminated_length": 278.8571472167969, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.5330759286880493, "epoch": 6.554794520547945, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.5932871855585862, "kl": 0.7389239072799683, "learning_rate": 3.3630136986301366e-07, "loss": 0.0133, "num_tokens": 11213212.0, "reward": 0.7271606922149658, "reward_std": 0.08750264346599579, "rewards/check_gptzero_func/mean": 0.727160632610321, "rewards/check_gptzero_func/std": 0.33669033646583557, "sampling/importance_sampling_ratio/max": 1.4577754735946655, "sampling/importance_sampling_ratio/mean": 0.9997466802597046, "sampling/importance_sampling_ratio/min": 0.6391983032226562, "sampling/sampling_logp_difference/max": 0.4475405216217041, "sampling/sampling_logp_difference/mean": 0.017649522051215172, "step": 957 }, { "clip_ratio/high_max": 0.01131889782845974, "clip_ratio/high_mean": 0.007800854742527008, "clip_ratio/low_mean": 0.005684324074536562, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.013485178351402283, "entropy": 0.531236469745636, "epoch": 6.561643835616438, "grad_norm": 1.4396214920454329, "kl": 0.7323268055915833, "learning_rate": 3.361301369863013e-07, "loss": 0.0044, "step": 958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1536.0, "completions/max_terminated_length": 1536.0, "completions/mean_length": 137.83929443359375, "completions/mean_terminated_length": 137.83929443359375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.2514035403728485, "epoch": 6.568493150684931, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.8669823853463274, "kl": 1.0064362287521362, "learning_rate": 3.3595890410958906e-07, "loss": 0.0155, "num_tokens": 11226137.0, "reward": 0.83514803647995, "reward_std": 0.09685864299535751, "rewards/check_gptzero_func/mean": 0.8351479768753052, "rewards/check_gptzero_func/std": 0.26870548725128174, "sampling/importance_sampling_ratio/max": 1.5839565992355347, "sampling/importance_sampling_ratio/mean": 1.0000629425048828, "sampling/importance_sampling_ratio/min": 0.6663914322853088, "sampling/sampling_logp_difference/max": 0.45992588996887207, "sampling/sampling_logp_difference/mean": 0.009461462497711182, "step": 959 }, { "clip_ratio/high_max": 0.01508120633661747, "clip_ratio/high_mean": 0.006736535578966141, "clip_ratio/low_mean": 0.004483071621507406, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.011219607666134834, "entropy": 0.2509172260761261, "epoch": 6.575342465753424, "grad_norm": 1.5919160764921034, "kl": 1.0051209926605225, "learning_rate": 3.357876712328767e-07, "loss": 0.0057, "step": 960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 969.0, "completions/max_terminated_length": 969.0, "completions/mean_length": 152.125, "completions/mean_terminated_length": 152.125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.3240790069103241, "epoch": 6.582191780821918, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.9242920887624788, "kl": 0.982433021068573, "learning_rate": 3.3561643835616436e-07, "loss": 0.0965, "num_tokens": 11239502.0, "reward": 0.7777268886566162, "reward_std": 0.1311730146408081, "rewards/check_gptzero_func/mean": 0.7777267694473267, "rewards/check_gptzero_func/std": 0.32508131861686707, "sampling/importance_sampling_ratio/max": 1.4729769229888916, "sampling/importance_sampling_ratio/mean": 0.99932461977005, "sampling/importance_sampling_ratio/min": 0.7354004383087158, "sampling/sampling_logp_difference/max": 0.3872854709625244, "sampling/sampling_logp_difference/mean": 0.011814256198704243, "step": 961 }, { "clip_ratio/high_max": 0.015855953097343445, "clip_ratio/high_mean": 0.006747506093233824, "clip_ratio/low_mean": 0.01057463139295578, "clip_ratio/low_min": 0.0019841270986944437, "clip_ratio/region_mean": 0.017322136089205742, "entropy": 0.3238201141357422, "epoch": 6.589041095890411, "grad_norm": 1.5873436726550376, "kl": 0.909851610660553, "learning_rate": 3.3544520547945207e-07, "loss": 0.0869, "step": 962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1140.0, "completions/mean_length": 315.9464416503906, "completions/mean_terminated_length": 216.5370330810547, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.4111272394657135, "epoch": 6.595890410958904, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.4297001832401686, "kl": 0.6706482172012329, "learning_rate": 3.352739726027397e-07, "loss": 0.013, "num_tokens": 11262687.0, "reward": 0.846051812171936, "reward_std": 0.06414158642292023, "rewards/check_gptzero_func/mean": 0.8460517525672913, "rewards/check_gptzero_func/std": 0.238500714302063, "sampling/importance_sampling_ratio/max": 1.6210869550704956, "sampling/importance_sampling_ratio/mean": 0.9999717473983765, "sampling/importance_sampling_ratio/min": 0.483410507440567, "sampling/sampling_logp_difference/max": 0.7268891334533691, "sampling/sampling_logp_difference/mean": 0.013323954306542873, "step": 963 }, { "clip_ratio/high_max": 0.012909320183098316, "clip_ratio/high_mean": 0.005767893511801958, "clip_ratio/low_mean": 0.004607475362718105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010375367477536201, "entropy": 0.41325536370277405, "epoch": 6.602739726027397, "grad_norm": 1.3970783040199055, "kl": 0.6605973839759827, "learning_rate": 3.3510273972602737e-07, "loss": 0.0061, "step": 964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1219.0, "completions/max_terminated_length": 1219.0, "completions/mean_length": 106.78572082519531, "completions/mean_terminated_length": 106.78572082519531, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "entropy": 0.2787340581417084, "epoch": 6.609589041095891, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.144528027709888, "kl": 1.0496615171432495, "learning_rate": 3.34931506849315e-07, "loss": 0.0325, "num_tokens": 11273077.0, "reward": 0.7884976267814636, "reward_std": 0.09453681111335754, "rewards/check_gptzero_func/mean": 0.7884975671768188, "rewards/check_gptzero_func/std": 0.3417496383190155, "sampling/importance_sampling_ratio/max": 1.3159703016281128, "sampling/importance_sampling_ratio/mean": 0.9996159672737122, "sampling/importance_sampling_ratio/min": 0.6171554327011108, "sampling/sampling_logp_difference/max": 0.4826343059539795, "sampling/sampling_logp_difference/mean": 0.010598243214190006, "step": 965 }, { "clip_ratio/high_max": 0.0181818176060915, "clip_ratio/high_mean": 0.011202218011021614, "clip_ratio/low_mean": 0.01126114372164011, "clip_ratio/low_min": 0.002109704539179802, "clip_ratio/region_mean": 0.0224633626639843, "entropy": 0.27864742279052734, "epoch": 6.616438356164384, "grad_norm": 1.7741180774279803, "kl": 1.0316630601882935, "learning_rate": 3.347602739726027e-07, "loss": 0.0213, "step": 966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 211.92857360839844, "completions/mean_terminated_length": 211.92857360839844, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.5612041354179382, "epoch": 6.623287671232877, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.7843724663324545, "kl": 0.7440025210380554, "learning_rate": 3.345890410958904e-07, "loss": -0.0173, "num_tokens": 11289413.0, "reward": 0.7746714949607849, "reward_std": 0.14464715123176575, "rewards/check_gptzero_func/mean": 0.7746714949607849, "rewards/check_gptzero_func/std": 0.35387784242630005, "sampling/importance_sampling_ratio/max": 1.4216530323028564, "sampling/importance_sampling_ratio/mean": 0.9996579885482788, "sampling/importance_sampling_ratio/min": 0.6298492550849915, "sampling/sampling_logp_difference/max": 0.46227478981018066, "sampling/sampling_logp_difference/mean": 0.01653088629245758, "step": 967 }, { "clip_ratio/high_max": 0.01690458320081234, "clip_ratio/high_mean": 0.007966435514390469, "clip_ratio/low_mean": 0.004908442962914705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012874878011643887, "entropy": 0.5615188479423523, "epoch": 6.63013698630137, "grad_norm": 1.6072982660583433, "kl": 0.7375378012657166, "learning_rate": 3.344178082191781e-07, "loss": -0.0278, "step": 968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1097.0, "completions/max_terminated_length": 1097.0, "completions/mean_length": 203.32144165039062, "completions/mean_terminated_length": 203.32144165039062, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "entropy": 0.525561511516571, "epoch": 6.636986301369863, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.7937069900018459, "kl": 0.7121672630310059, "learning_rate": 3.342465753424658e-07, "loss": 0.0161, "num_tokens": 11305935.0, "reward": 0.8064086437225342, "reward_std": 0.05650532245635986, "rewards/check_gptzero_func/mean": 0.8064085841178894, "rewards/check_gptzero_func/std": 0.32788029313087463, "sampling/importance_sampling_ratio/max": 1.5731968879699707, "sampling/importance_sampling_ratio/mean": 1.0005449056625366, "sampling/importance_sampling_ratio/min": 0.6911508440971375, "sampling/sampling_logp_difference/max": 0.4531097412109375, "sampling/sampling_logp_difference/mean": 0.01680046319961548, "step": 969 }, { "clip_ratio/high_max": 0.013540621846914291, "clip_ratio/high_mean": 0.007859020493924618, "clip_ratio/low_mean": 0.006990585010498762, "clip_ratio/low_min": 0.001186239649541676, "clip_ratio/region_mean": 0.014849605038762093, "entropy": 0.5263198614120483, "epoch": 6.6438356164383565, "grad_norm": 1.5548760933872046, "kl": 0.7101020216941833, "learning_rate": 3.3407534246575343e-07, "loss": 0.0063, "step": 970 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1226.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 191.7857208251953, "completions/mean_terminated_length": 191.7857208251953, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.37754175066947937, "epoch": 6.6506849315068495, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.7907443139408257, "kl": 0.7811705470085144, "learning_rate": 3.339041095890411e-07, "loss": 0.0361, "num_tokens": 11322009.0, "reward": 0.8470126390457153, "reward_std": 0.08386094868183136, "rewards/check_gptzero_func/mean": 0.8470125794410706, "rewards/check_gptzero_func/std": 0.2743457555770874, "sampling/importance_sampling_ratio/max": 1.4799034595489502, "sampling/importance_sampling_ratio/mean": 0.999972403049469, "sampling/importance_sampling_ratio/min": 0.6883715987205505, "sampling/sampling_logp_difference/max": 0.39197683334350586, "sampling/sampling_logp_difference/mean": 0.012261846102774143, "step": 971 }, { "clip_ratio/high_max": 0.01464808825403452, "clip_ratio/high_mean": 0.008614635095000267, "clip_ratio/low_mean": 0.00827480386942625, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.016889439895749092, "entropy": 0.37771058082580566, "epoch": 6.657534246575342, "grad_norm": 1.6576964779219343, "kl": 0.7828019857406616, "learning_rate": 3.3373287671232873e-07, "loss": 0.0271, "step": 972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 154.8928680419922, "completions/mean_terminated_length": 154.8928680419922, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.3694452941417694, "epoch": 6.664383561643835, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.9466344778428424, "kl": 1.014339566230774, "learning_rate": 3.3356164383561643e-07, "loss": -0.0249, "num_tokens": 11335713.0, "reward": 0.8079875707626343, "reward_std": 0.08356232941150665, "rewards/check_gptzero_func/mean": 0.8079875707626343, "rewards/check_gptzero_func/std": 0.3054834008216858, "sampling/importance_sampling_ratio/max": 1.3333762884140015, "sampling/importance_sampling_ratio/mean": 0.9997719526290894, "sampling/importance_sampling_ratio/min": 0.6249149441719055, "sampling/sampling_logp_difference/max": 0.470139741897583, "sampling/sampling_logp_difference/mean": 0.01283750869333744, "step": 973 }, { "clip_ratio/high_max": 0.021215597167611122, "clip_ratio/high_mean": 0.009771010838449001, "clip_ratio/low_mean": 0.006818183697760105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.016589192673563957, "entropy": 0.3714495599269867, "epoch": 6.671232876712329, "grad_norm": 1.5877290749902229, "kl": 0.995790958404541, "learning_rate": 3.333904109589041e-07, "loss": -0.035, "step": 974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 173.69644165039062, "completions/mean_terminated_length": 173.69644165039062, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.3813750445842743, "epoch": 6.678082191780822, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.9151117865293337, "kl": 0.9406258463859558, "learning_rate": 3.3321917808219173e-07, "loss": -0.0263, "num_tokens": 11350258.0, "reward": 0.8023911118507385, "reward_std": 0.08078828454017639, "rewards/check_gptzero_func/mean": 0.8023910522460938, "rewards/check_gptzero_func/std": 0.31211066246032715, "sampling/importance_sampling_ratio/max": 1.4307982921600342, "sampling/importance_sampling_ratio/mean": 0.9996694326400757, "sampling/importance_sampling_ratio/min": 0.6485326886177063, "sampling/sampling_logp_difference/max": 0.43304288387298584, "sampling/sampling_logp_difference/mean": 0.013382482342422009, "step": 975 }, { "clip_ratio/high_max": 0.027060270309448242, "clip_ratio/high_mean": 0.011040468700230122, "clip_ratio/low_mean": 0.005882262717932463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.016922730952501297, "entropy": 0.3805972933769226, "epoch": 6.684931506849315, "grad_norm": 1.5913406048766692, "kl": 0.9333099722862244, "learning_rate": 3.3304794520547944e-07, "loss": -0.0364, "step": 976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1005.0, "completions/max_terminated_length": 1005.0, "completions/mean_length": 135.10714721679688, "completions/mean_terminated_length": 135.10714721679688, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.24230453372001648, "epoch": 6.691780821917808, "frac_reward_zero_std": 0.3214285969734192, "grad_norm": 2.12128763938734, "kl": 0.8534108996391296, "learning_rate": 3.3287671232876714e-07, "loss": -0.0352, "num_tokens": 11362418.0, "reward": 0.8225613236427307, "reward_std": 0.04248669371008873, "rewards/check_gptzero_func/mean": 0.8225612640380859, "rewards/check_gptzero_func/std": 0.3032451272010803, "sampling/importance_sampling_ratio/max": 1.4052797555923462, "sampling/importance_sampling_ratio/mean": 0.9998078942298889, "sampling/importance_sampling_ratio/min": 0.6715859770774841, "sampling/sampling_logp_difference/max": 0.3981132507324219, "sampling/sampling_logp_difference/mean": 0.007883410900831223, "step": 977 }, { "clip_ratio/high_max": 0.022921845316886902, "clip_ratio/high_mean": 0.007017848547548056, "clip_ratio/low_mean": 0.004049830604344606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.011067680083215237, "entropy": 0.24342286586761475, "epoch": 6.698630136986301, "grad_norm": 1.6827355657132934, "kl": 0.8509465456008911, "learning_rate": 3.327054794520548e-07, "loss": -0.0454, "step": 978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1253.0, "completions/max_terminated_length": 1253.0, "completions/mean_length": 164.0178680419922, "completions/mean_terminated_length": 164.0178680419922, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.36734405159950256, "epoch": 6.705479452054795, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 2.343649829173579, "kl": 0.7540148496627808, "learning_rate": 3.3253424657534244e-07, "loss": 0.0282, "num_tokens": 11376531.0, "reward": 0.8230324983596802, "reward_std": 0.04911067709326744, "rewards/check_gptzero_func/mean": 0.8230324387550354, "rewards/check_gptzero_func/std": 0.3059375584125519, "sampling/importance_sampling_ratio/max": 1.4808604717254639, "sampling/importance_sampling_ratio/mean": 0.9999021887779236, "sampling/importance_sampling_ratio/min": 0.6935681700706482, "sampling/sampling_logp_difference/max": 0.39262330532073975, "sampling/sampling_logp_difference/mean": 0.012697961181402206, "step": 979 }, { "clip_ratio/high_max": 0.017844397574663162, "clip_ratio/high_mean": 0.006598223000764847, "clip_ratio/low_mean": 0.007484477013349533, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.01408270001411438, "entropy": 0.36813250184059143, "epoch": 6.712328767123288, "grad_norm": 1.606292155333358, "kl": 0.7556822896003723, "learning_rate": 3.3236301369863014e-07, "loss": 0.0192, "step": 980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1082.0, "completions/max_terminated_length": 1082.0, "completions/mean_length": 124.0714340209961, "completions/mean_terminated_length": 124.0714340209961, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.30990245938301086, "epoch": 6.719178082191781, "frac_reward_zero_std": 0.25, "grad_norm": 1.9766655403855304, "kl": 0.7858802676200867, "learning_rate": 3.321917808219178e-07, "loss": 0.0128, "num_tokens": 11388559.0, "reward": 0.8539736866950989, "reward_std": 0.05770857259631157, "rewards/check_gptzero_func/mean": 0.8539736866950989, "rewards/check_gptzero_func/std": 0.27127954363822937, "sampling/importance_sampling_ratio/max": 1.4413087368011475, "sampling/importance_sampling_ratio/mean": 1.000356674194336, "sampling/importance_sampling_ratio/min": 0.7150513529777527, "sampling/sampling_logp_difference/max": 0.3655514717102051, "sampling/sampling_logp_difference/mean": 0.009889181703329086, "step": 981 }, { "clip_ratio/high_max": 0.01981767825782299, "clip_ratio/high_mean": 0.007063596043735743, "clip_ratio/low_mean": 0.0052571892738342285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012320784851908684, "entropy": 0.31011733412742615, "epoch": 6.726027397260274, "grad_norm": 1.678622020125104, "kl": 0.784532904624939, "learning_rate": 3.3202054794520545e-07, "loss": 0.0026, "step": 982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1220.0, "completions/max_terminated_length": 1220.0, "completions/mean_length": 269.14288330078125, "completions/mean_terminated_length": 269.14288330078125, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.5003277063369751, "epoch": 6.732876712328768, "frac_reward_zero_std": 0.2142857313156128, "grad_norm": 1.5980825965328114, "kl": 0.6836075186729431, "learning_rate": 3.318493150684931e-07, "loss": -0.0123, "num_tokens": 11408985.0, "reward": 0.7575916647911072, "reward_std": 0.08939244598150253, "rewards/check_gptzero_func/mean": 0.7575916647911072, "rewards/check_gptzero_func/std": 0.3560483455657959, "sampling/importance_sampling_ratio/max": 1.6237695217132568, "sampling/importance_sampling_ratio/mean": 1.0000889301300049, "sampling/importance_sampling_ratio/min": 0.6729905605316162, "sampling/sampling_logp_difference/max": 0.48475027084350586, "sampling/sampling_logp_difference/mean": 0.016126329079270363, "step": 983 }, { "clip_ratio/high_max": 0.012848914600908756, "clip_ratio/high_mean": 0.007275646086782217, "clip_ratio/low_mean": 0.005413442384451628, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012689088471233845, "entropy": 0.5006116628646851, "epoch": 6.739726027397261, "grad_norm": 1.389694090036049, "kl": 0.6729931235313416, "learning_rate": 3.316780821917808e-07, "loss": -0.0212, "step": 984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1812.0, "completions/max_terminated_length": 1812.0, "completions/mean_length": 174.35714721679688, "completions/mean_terminated_length": 174.35714721679688, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "entropy": 0.4530147612094879, "epoch": 6.7465753424657535, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.0172392746303847, "kl": 0.8554176092147827, "learning_rate": 3.315068493150685e-07, "loss": 0.0667, "num_tokens": 11423809.0, "reward": 0.8048385977745056, "reward_std": 0.10221386700868607, "rewards/check_gptzero_func/mean": 0.8048386573791504, "rewards/check_gptzero_func/std": 0.3024771809577942, "sampling/importance_sampling_ratio/max": 1.6673232316970825, "sampling/importance_sampling_ratio/mean": 0.9995903372764587, "sampling/importance_sampling_ratio/min": 0.39470574259757996, "sampling/sampling_logp_difference/max": 0.929614782333374, "sampling/sampling_logp_difference/mean": 0.012573566287755966, "step": 985 }, { "clip_ratio/high_max": 0.016480447724461555, "clip_ratio/high_mean": 0.007360031362622976, "clip_ratio/low_mean": 0.006229279097169638, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.013589310459792614, "entropy": 0.4533076286315918, "epoch": 6.7534246575342465, "grad_norm": 1.6785988835194963, "kl": 0.8541144132614136, "learning_rate": 3.3133561643835615e-07, "loss": 0.0565, "step": 986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1177.0, "completions/max_terminated_length": 1177.0, "completions/mean_length": 109.21428680419922, "completions/mean_terminated_length": 109.21428680419922, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.28958892822265625, "epoch": 6.760273972602739, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.1789559114235995, "kl": 1.0695854425430298, "learning_rate": 3.3116438356164386e-07, "loss": 0.0045, "num_tokens": 11434423.0, "reward": 0.8712292313575745, "reward_std": 0.043967071920633316, "rewards/check_gptzero_func/mean": 0.8712291717529297, "rewards/check_gptzero_func/std": 0.2488313764333725, "sampling/importance_sampling_ratio/max": 1.3856396675109863, "sampling/importance_sampling_ratio/mean": 0.999668300151825, "sampling/importance_sampling_ratio/min": 0.7105199694633484, "sampling/sampling_logp_difference/max": 0.34175825119018555, "sampling/sampling_logp_difference/mean": 0.009721715934574604, "step": 987 }, { "clip_ratio/high_max": 0.02319587580859661, "clip_ratio/high_mean": 0.011944472789764404, "clip_ratio/low_mean": 0.008354301564395428, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.020298775285482407, "entropy": 0.290133535861969, "epoch": 6.767123287671232, "grad_norm": 1.7476748020017119, "kl": 1.0549875497817993, "learning_rate": 3.309931506849315e-07, "loss": -0.0065, "step": 988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 771.0, "completions/max_terminated_length": 771.0, "completions/mean_length": 113.75000762939453, "completions/mean_terminated_length": 113.75000762939453, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "entropy": 0.3869740962982178, "epoch": 6.773972602739726, "frac_reward_zero_std": 0.25, "grad_norm": 2.0583941329146396, "kl": 0.8456348776817322, "learning_rate": 3.3082191780821916e-07, "loss": 0.0781, "num_tokens": 11445981.0, "reward": 0.7375643849372864, "reward_std": 0.06702075153589249, "rewards/check_gptzero_func/mean": 0.7375643849372864, "rewards/check_gptzero_func/std": 0.3882542848587036, "sampling/importance_sampling_ratio/max": 1.3172215223312378, "sampling/importance_sampling_ratio/mean": 0.9999231696128845, "sampling/importance_sampling_ratio/min": 0.7580838203430176, "sampling/sampling_logp_difference/max": 0.2769613265991211, "sampling/sampling_logp_difference/mean": 0.012401547282934189, "step": 989 }, { "clip_ratio/high_max": 0.022424666211009026, "clip_ratio/high_mean": 0.012130067683756351, "clip_ratio/low_mean": 0.0122296791523695, "clip_ratio/low_min": 0.001550387591123581, "clip_ratio/region_mean": 0.024359745904803276, "entropy": 0.3891127407550812, "epoch": 6.780821917808219, "grad_norm": 1.6704991317948668, "kl": 0.8372093439102173, "learning_rate": 3.306506849315068e-07, "loss": 0.0676, "step": 990 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.0, "completions/max_terminated_length": 177.0, "completions/mean_length": 62.33928680419922, "completions/mean_terminated_length": 62.33928680419922, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.1536727249622345, "epoch": 6.787671232876712, "frac_reward_zero_std": 0.25, "grad_norm": 1.7025502174253695, "kl": 1.0464352369308472, "learning_rate": 3.304794520547945e-07, "loss": 0.0291, "num_tokens": 11454252.0, "reward": 0.9023252129554749, "reward_std": 0.031444843858480453, "rewards/check_gptzero_func/mean": 0.9023252129554749, "rewards/check_gptzero_func/std": 0.21166619658470154, "sampling/importance_sampling_ratio/max": 1.243861198425293, "sampling/importance_sampling_ratio/mean": 0.9988517761230469, "sampling/importance_sampling_ratio/min": 0.3883288502693176, "sampling/sampling_logp_difference/max": 0.9459028244018555, "sampling/sampling_logp_difference/mean": 0.005234023556113243, "step": 991 }, { "clip_ratio/high_max": 0.02331606298685074, "clip_ratio/high_mean": 0.005830562673509121, "clip_ratio/low_mean": 0.013972708955407143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.019803272560238838, "entropy": 0.15131725370883942, "epoch": 6.794520547945205, "grad_norm": 1.068471671969981, "kl": 1.0534780025482178, "learning_rate": 3.3030821917808216e-07, "loss": 0.0231, "step": 992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1152.0, "completions/max_terminated_length": 1152.0, "completions/mean_length": 138.58929443359375, "completions/mean_terminated_length": 138.58929443359375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.3207233250141144, "epoch": 6.801369863013699, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.060573935070497, "kl": 1.0710108280181885, "learning_rate": 3.301369863013698e-07, "loss": 0.0276, "num_tokens": 11466827.0, "reward": 0.8049690127372742, "reward_std": 0.03761245310306549, "rewards/check_gptzero_func/mean": 0.8049689531326294, "rewards/check_gptzero_func/std": 0.34998172521591187, "sampling/importance_sampling_ratio/max": 1.3846185207366943, "sampling/importance_sampling_ratio/mean": 1.0001627206802368, "sampling/importance_sampling_ratio/min": 0.6874688863754272, "sampling/sampling_logp_difference/max": 0.3747386932373047, "sampling/sampling_logp_difference/mean": 0.011028822511434555, "step": 993 }, { "clip_ratio/high_max": 0.02560124173760414, "clip_ratio/high_mean": 0.009747887961566448, "clip_ratio/low_mean": 0.005705575924366713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.015453465282917023, "entropy": 0.3231731057167053, "epoch": 6.808219178082192, "grad_norm": 1.6113853069608326, "kl": 1.0091804265975952, "learning_rate": 3.2996575342465757e-07, "loss": 0.0169, "step": 994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1223.0, "completions/max_terminated_length": 1223.0, "completions/mean_length": 140.32144165039062, "completions/mean_terminated_length": 140.32144165039062, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "entropy": 0.319137841463089, "epoch": 6.815068493150685, "frac_reward_zero_std": 0.3214285969734192, "grad_norm": 1.7889108011171317, "kl": 0.7340723872184753, "learning_rate": 3.297945205479452e-07, "loss": -0.0513, "num_tokens": 11480113.0, "reward": 0.7836477160453796, "reward_std": 0.05114322155714035, "rewards/check_gptzero_func/mean": 0.7836476564407349, "rewards/check_gptzero_func/std": 0.3132423162460327, "sampling/importance_sampling_ratio/max": 1.5673249959945679, "sampling/importance_sampling_ratio/mean": 1.000186800956726, "sampling/importance_sampling_ratio/min": 0.7232039570808411, "sampling/sampling_logp_difference/max": 0.4493703842163086, "sampling/sampling_logp_difference/mean": 0.010956763289868832, "step": 995 }, { "clip_ratio/high_max": 0.019728729501366615, "clip_ratio/high_mean": 0.010859578847885132, "clip_ratio/low_mean": 0.007433141116052866, "clip_ratio/low_min": 0.0014430014416575432, "clip_ratio/region_mean": 0.01829272136092186, "entropy": 0.3196876347064972, "epoch": 6.821917808219178, "grad_norm": 1.485748134957696, "kl": 0.7311388850212097, "learning_rate": 3.2962328767123287e-07, "loss": -0.061, "step": 996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 595.0, "completions/max_terminated_length": 595.0, "completions/mean_length": 82.89286041259766, "completions/mean_terminated_length": 82.89286041259766, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.2463623732328415, "epoch": 6.828767123287671, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 2.0008688513621276, "kl": 1.13613760471344, "learning_rate": 3.294520547945205e-07, "loss": 0.0411, "num_tokens": 11489801.0, "reward": 0.8693910241127014, "reward_std": 0.07717953622341156, "rewards/check_gptzero_func/mean": 0.8693909645080566, "rewards/check_gptzero_func/std": 0.2900925278663635, "sampling/importance_sampling_ratio/max": 1.2372212409973145, "sampling/importance_sampling_ratio/mean": 0.9998201727867126, "sampling/importance_sampling_ratio/min": 0.7704857587814331, "sampling/sampling_logp_difference/max": 0.26073408126831055, "sampling/sampling_logp_difference/mean": 0.0080596087500453, "step": 997 }, { "clip_ratio/high_max": 0.031111111864447594, "clip_ratio/high_mean": 0.006397956050932407, "clip_ratio/low_mean": 0.010410969145596027, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.016808925196528435, "entropy": 0.2462010681629181, "epoch": 6.835616438356165, "grad_norm": 1.52271654231521, "kl": 1.1302868127822876, "learning_rate": 3.292808219178082e-07, "loss": 0.032, "step": 998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 191.44644165039062, "completions/mean_terminated_length": 191.44644165039062, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.15991219878196716, "epoch": 6.842465753424658, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.5610102296771071, "kl": 0.8953300714492798, "learning_rate": 3.2910958904109587e-07, "loss": 0.0211, "num_tokens": 11505794.0, "reward": 0.9129741191864014, "reward_std": 0.04257187992334366, "rewards/check_gptzero_func/mean": 0.9129740595817566, "rewards/check_gptzero_func/std": 0.20171868801116943, "sampling/importance_sampling_ratio/max": 1.5453262329101562, "sampling/importance_sampling_ratio/mean": 0.9997420907020569, "sampling/importance_sampling_ratio/min": 0.4159659743309021, "sampling/sampling_logp_difference/max": 0.8771518468856812, "sampling/sampling_logp_difference/mean": 0.0055845207534730434, "step": 999 }, { "clip_ratio/high_max": 0.01448256429284811, "clip_ratio/high_mean": 0.002757469890639186, "clip_ratio/low_mean": 0.003055048640817404, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005812518298625946, "entropy": 0.1595848947763443, "epoch": 6.8493150684931505, "grad_norm": 1.3004738225545465, "kl": 0.8942005038261414, "learning_rate": 3.289383561643835e-07, "loss": 0.0128, "step": 1000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1164.0, "completions/max_terminated_length": 1164.0, "completions/mean_length": 152.60714721679688, "completions/mean_terminated_length": 152.60714721679688, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.35873860120773315, "epoch": 6.8561643835616435, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.9843003214351216, "kl": 0.8028587698936462, "learning_rate": 3.287671232876712e-07, "loss": 0.0081, "num_tokens": 11519086.0, "reward": 0.7882413268089294, "reward_std": 0.058241140097379684, "rewards/check_gptzero_func/mean": 0.7882413268089294, "rewards/check_gptzero_func/std": 0.3160807490348816, "sampling/importance_sampling_ratio/max": 1.6469541788101196, "sampling/importance_sampling_ratio/mean": 0.9994303584098816, "sampling/importance_sampling_ratio/min": 0.5314502120018005, "sampling/sampling_logp_difference/max": 0.632145881652832, "sampling/sampling_logp_difference/mean": 0.012452484108507633, "step": 1001 }, { "clip_ratio/high_max": 0.045627377927303314, "clip_ratio/high_mean": 0.009781037457287312, "clip_ratio/low_mean": 0.007556046824902296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.01733708195388317, "entropy": 0.358413428068161, "epoch": 6.863013698630137, "grad_norm": 1.6984658400123323, "kl": 0.8072037100791931, "learning_rate": 3.285958904109589e-07, "loss": -0.0014, "step": 1002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 140.08929443359375, "completions/mean_terminated_length": 140.08929443359375, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.29419344663619995, "epoch": 6.86986301369863, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.0774078517191126, "kl": 0.9968339204788208, "learning_rate": 3.284246575342466e-07, "loss": 0.0389, "num_tokens": 11531581.0, "reward": 0.7900047898292542, "reward_std": 0.06058526784181595, "rewards/check_gptzero_func/mean": 0.7900047302246094, "rewards/check_gptzero_func/std": 0.34325870871543884, "sampling/importance_sampling_ratio/max": 1.5339714288711548, "sampling/importance_sampling_ratio/mean": 1.0004034042358398, "sampling/importance_sampling_ratio/min": 0.7210165858268738, "sampling/sampling_logp_difference/max": 0.4278600215911865, "sampling/sampling_logp_difference/mean": 0.009185637347400188, "step": 1003 }, { "clip_ratio/high_max": 0.022286605089902878, "clip_ratio/high_mean": 0.005534926895052195, "clip_ratio/low_mean": 0.0075079090893268585, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.01304283645004034, "entropy": 0.29482290148735046, "epoch": 6.876712328767123, "grad_norm": 1.715895722284741, "kl": 0.996972382068634, "learning_rate": 3.2825342465753423e-07, "loss": 0.0281, "step": 1004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 150.0178680419922, "completions/mean_terminated_length": 150.0178680419922, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.21507832407951355, "epoch": 6.883561643835616, "frac_reward_zero_std": 0.392857164144516, "grad_norm": 1.7057926788510056, "kl": 0.6446422934532166, "learning_rate": 3.2808219178082193e-07, "loss": 0.0124, "num_tokens": 11545682.0, "reward": 0.8134267330169678, "reward_std": 0.02204255945980549, "rewards/check_gptzero_func/mean": 0.813426673412323, "rewards/check_gptzero_func/std": 0.34179946780204773, "sampling/importance_sampling_ratio/max": 1.26613450050354, "sampling/importance_sampling_ratio/mean": 0.9998787641525269, "sampling/importance_sampling_ratio/min": 0.7282107472419739, "sampling/sampling_logp_difference/max": 0.3171647787094116, "sampling/sampling_logp_difference/mean": 0.006409636698663235, "step": 1005 }, { "clip_ratio/high_max": 0.021836673840880394, "clip_ratio/high_mean": 0.004996527452021837, "clip_ratio/low_mean": 0.0035157687962055206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008512296713888645, "entropy": 0.215297669172287, "epoch": 6.890410958904109, "grad_norm": 1.4279592557857343, "kl": 0.646992027759552, "learning_rate": 3.279109589041096e-07, "loss": 0.0036, "step": 1006 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1143.0, "completions/max_terminated_length": 1143.0, "completions/mean_length": 185.33929443359375, "completions/mean_terminated_length": 185.33929443359375, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.35390517115592957, "epoch": 6.897260273972603, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.7814668464616363, "kl": 0.8692004084587097, "learning_rate": 3.2773972602739723e-07, "loss": -0.0185, "num_tokens": 11560869.0, "reward": 0.8287277817726135, "reward_std": 0.07731964439153671, "rewards/check_gptzero_func/mean": 0.8287277221679688, "rewards/check_gptzero_func/std": 0.29364126920700073, "sampling/importance_sampling_ratio/max": 1.419948697090149, "sampling/importance_sampling_ratio/mean": 1.0004767179489136, "sampling/importance_sampling_ratio/min": 0.6407833695411682, "sampling/sampling_logp_difference/max": 0.44506382942199707, "sampling/sampling_logp_difference/mean": 0.012271360494196415, "step": 1007 }, { "clip_ratio/high_max": 0.010447761043906212, "clip_ratio/high_mean": 0.006260292138904333, "clip_ratio/low_mean": 0.005257438402622938, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.011517731472849846, "entropy": 0.3536512851715088, "epoch": 6.904109589041096, "grad_norm": 1.5734924099378007, "kl": 0.8670295476913452, "learning_rate": 3.275684931506849e-07, "loss": -0.0276, "step": 1008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1260.0, "completions/max_terminated_length": 1260.0, "completions/mean_length": 206.6428680419922, "completions/mean_terminated_length": 206.6428680419922, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.5447494387626648, "epoch": 6.910958904109589, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.0613026130308247, "kl": 0.8680709600448608, "learning_rate": 3.273972602739726e-07, "loss": 0.0169, "num_tokens": 11577077.0, "reward": 0.7296488881111145, "reward_std": 0.11544772982597351, "rewards/check_gptzero_func/mean": 0.7296488881111145, "rewards/check_gptzero_func/std": 0.3914875388145447, "sampling/importance_sampling_ratio/max": 1.3867931365966797, "sampling/importance_sampling_ratio/mean": 0.9998430609703064, "sampling/importance_sampling_ratio/min": 0.6962363719940186, "sampling/sampling_logp_difference/max": 0.36206603050231934, "sampling/sampling_logp_difference/mean": 0.0174064002931118, "step": 1009 }, { "clip_ratio/high_max": 0.016780609264969826, "clip_ratio/high_mean": 0.009299246594309807, "clip_ratio/low_mean": 0.007217296399176121, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.016516542062163353, "entropy": 0.5449120402336121, "epoch": 6.917808219178082, "grad_norm": 1.6315896946658273, "kl": 0.8274958729743958, "learning_rate": 3.2722602739726024e-07, "loss": 0.0073, "step": 1010 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 127.8214340209961, "completions/mean_terminated_length": 127.8214340209961, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 0.207599475979805, "epoch": 6.924657534246576, "frac_reward_zero_std": 0.2142857313156128, "grad_norm": 5.968342971511461, "kl": 1.086236834526062, "learning_rate": 3.270547945205479e-07, "loss": 0.032, "num_tokens": 11589509.0, "reward": 0.8843947052955627, "reward_std": 0.046678539365530014, "rewards/check_gptzero_func/mean": 0.884394645690918, "rewards/check_gptzero_func/std": 0.26787492632865906, "sampling/importance_sampling_ratio/max": 1.4394207000732422, "sampling/importance_sampling_ratio/mean": 0.9998716711997986, "sampling/importance_sampling_ratio/min": 0.6151273846626282, "sampling/sampling_logp_difference/max": 0.48592591285705566, "sampling/sampling_logp_difference/mean": 0.0067448182962834835, "step": 1011 }, { "clip_ratio/high_max": 0.012131716124713421, "clip_ratio/high_mean": 0.0028647349681705236, "clip_ratio/low_mean": 0.002540930639952421, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005405665375292301, "entropy": 0.20804421603679657, "epoch": 6.931506849315069, "grad_norm": 1.839144977297845, "kl": 1.0861955881118774, "learning_rate": 3.2688356164383565e-07, "loss": 0.025, "step": 1012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/max_terminated_length": 124.0, "completions/mean_length": 62.46428680419922, "completions/mean_terminated_length": 62.46428680419922, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.07171334326267242, "epoch": 6.938356164383562, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 0.835670214388494, "kl": 1.0573835372924805, "learning_rate": 3.267123287671233e-07, "loss": -0.0068, "num_tokens": 11597767.0, "reward": 0.9287348389625549, "reward_std": 0.06914239376783371, "rewards/check_gptzero_func/mean": 0.9287347793579102, "rewards/check_gptzero_func/std": 0.20545175671577454, "sampling/importance_sampling_ratio/max": 1.2759231328964233, "sampling/importance_sampling_ratio/mean": 1.0004609823226929, "sampling/importance_sampling_ratio/min": 0.8462569117546082, "sampling/sampling_logp_difference/max": 0.24366998672485352, "sampling/sampling_logp_difference/mean": 0.002371947281062603, "step": 1013 }, { "clip_ratio/high_max": 0.03611738234758377, "clip_ratio/high_mean": 0.0070318421348929405, "clip_ratio/low_mean": 0.005707554519176483, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012739396654069424, "entropy": 0.0724063515663147, "epoch": 6.945205479452055, "grad_norm": 0.5195403461036142, "kl": 1.0523167848587036, "learning_rate": 3.2654109589041095e-07, "loss": -0.0096, "step": 1014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1377.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 249.46429443359375, "completions/mean_terminated_length": 249.46429443359375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.39189842343330383, "epoch": 6.9520547945205475, "frac_reward_zero_std": 0.2857142984867096, "grad_norm": 2.617327332516842, "kl": 1.0155558586120605, "learning_rate": 3.263698630136986e-07, "loss": -0.0082, "num_tokens": 11616675.0, "reward": 0.798653244972229, "reward_std": 0.05101495236158371, "rewards/check_gptzero_func/mean": 0.7986531853675842, "rewards/check_gptzero_func/std": 0.3175486624240875, "sampling/importance_sampling_ratio/max": 1.4240747690200806, "sampling/importance_sampling_ratio/mean": 1.0004874467849731, "sampling/importance_sampling_ratio/min": 0.7024645209312439, "sampling/sampling_logp_difference/max": 0.35352230072021484, "sampling/sampling_logp_difference/mean": 0.012328589335083961, "step": 1015 }, { "clip_ratio/high_max": 0.008433200418949127, "clip_ratio/high_mean": 0.00577458506450057, "clip_ratio/low_mean": 0.002497653244063258, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00827223900705576, "entropy": 0.39433401823043823, "epoch": 6.958904109589041, "grad_norm": 1.4488979157076143, "kl": 0.6756958365440369, "learning_rate": 3.261986301369863e-07, "loss": -0.0171, "step": 1016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1161.0, "completions/max_terminated_length": 1161.0, "completions/mean_length": 171.08929443359375, "completions/mean_terminated_length": 171.08929443359375, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 0.36356085538864136, "epoch": 6.965753424657534, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.7365290851125195, "kl": 0.7391883134841919, "learning_rate": 3.2602739726027395e-07, "loss": -0.0096, "num_tokens": 11631062.0, "reward": 0.803486704826355, "reward_std": 0.09294906258583069, "rewards/check_gptzero_func/mean": 0.803486704826355, "rewards/check_gptzero_func/std": 0.35796114802360535, "sampling/importance_sampling_ratio/max": 1.4352084398269653, "sampling/importance_sampling_ratio/mean": 1.0003762245178223, "sampling/importance_sampling_ratio/min": 0.6973575949668884, "sampling/sampling_logp_difference/max": 0.3613100051879883, "sampling/sampling_logp_difference/mean": 0.012066553346812725, "step": 1017 }, { "clip_ratio/high_max": 0.01974865421652794, "clip_ratio/high_mean": 0.008413241244852543, "clip_ratio/low_mean": 0.006614007521420717, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.015027248300611973, "entropy": 0.3631785809993744, "epoch": 6.972602739726027, "grad_norm": 1.4544933240199767, "kl": 0.7226127982139587, "learning_rate": 3.258561643835616e-07, "loss": -0.0192, "step": 1018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 141.83929443359375, "completions/mean_terminated_length": 141.83929443359375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.24656115472316742, "epoch": 6.97945205479452, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.7465141435352494, "kl": 0.910511314868927, "learning_rate": 3.256849315068493e-07, "loss": -0.0055, "num_tokens": 11644379.0, "reward": 0.8909950852394104, "reward_std": 0.06748407334089279, "rewards/check_gptzero_func/mean": 0.8909950256347656, "rewards/check_gptzero_func/std": 0.2719295620918274, "sampling/importance_sampling_ratio/max": 1.3970898389816284, "sampling/importance_sampling_ratio/mean": 0.999755859375, "sampling/importance_sampling_ratio/min": 0.6672938466072083, "sampling/sampling_logp_difference/max": 0.4045248031616211, "sampling/sampling_logp_difference/mean": 0.008340083062648773, "step": 1019 }, { "clip_ratio/high_max": 0.01916932873427868, "clip_ratio/high_mean": 0.0058990721590816975, "clip_ratio/low_mean": 0.0049624149687588215, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010861487127840519, "entropy": 0.24636192619800568, "epoch": 6.986301369863014, "grad_norm": 1.436174720217551, "kl": 0.9073442816734314, "learning_rate": 3.2551369863013695e-07, "loss": -0.0143, "step": 1020 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1169.0, "completions/max_terminated_length": 1169.0, "completions/mean_length": 202.57144165039062, "completions/mean_terminated_length": 202.57144165039062, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.34911197423934937, "epoch": 6.993150684931507, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.7514872168015339, "kl": 0.9704928994178772, "learning_rate": 3.2534246575342466e-07, "loss": -0.0172, "num_tokens": 11660241.0, "reward": 0.8646639585494995, "reward_std": 0.03656037524342537, "rewards/check_gptzero_func/mean": 0.8646640181541443, "rewards/check_gptzero_func/std": 0.24246059358119965, "sampling/importance_sampling_ratio/max": 1.3003398180007935, "sampling/importance_sampling_ratio/mean": 0.9998878836631775, "sampling/importance_sampling_ratio/min": 0.6952359676361084, "sampling/sampling_logp_difference/max": 0.36350393295288086, "sampling/sampling_logp_difference/mean": 0.011422916315495968, "step": 1021 }, { "clip_ratio/high_max": 0.01154068112373352, "clip_ratio/high_mean": 0.0053936270996928215, "clip_ratio/low_mean": 0.0037474751006811857, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.009141102433204651, "entropy": 0.3488759398460388, "epoch": 7.0, "grad_norm": 1.5552034256264935, "kl": 0.9692155122756958, "learning_rate": 3.251712328767123e-07, "loss": -0.0269, "step": 1022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1185.0, "completions/max_terminated_length": 1185.0, "completions/mean_length": 132.58929443359375, "completions/mean_terminated_length": 132.58929443359375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.24286532402038574, "epoch": 7.006849315068493, "frac_reward_zero_std": 0.2857142984867096, "grad_norm": 1.8025097755181887, "kl": 0.7034915685653687, "learning_rate": 3.25e-07, "loss": 0.003, "num_tokens": 11672314.0, "reward": 0.8447930216789246, "reward_std": 0.056507039815187454, "rewards/check_gptzero_func/mean": 0.8447929620742798, "rewards/check_gptzero_func/std": 0.30586767196655273, "sampling/importance_sampling_ratio/max": 1.516050934791565, "sampling/importance_sampling_ratio/mean": 0.9998716711997986, "sampling/importance_sampling_ratio/min": 0.6977732181549072, "sampling/sampling_logp_difference/max": 0.4161089062690735, "sampling/sampling_logp_difference/mean": 0.007886177860200405, "step": 1023 }, { "clip_ratio/high_max": 0.01640881411731243, "clip_ratio/high_mean": 0.006468861363828182, "clip_ratio/low_mean": 0.005633519496768713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012102380394935608, "entropy": 0.2408507615327835, "epoch": 7.013698630136986, "grad_norm": 1.5265792426137748, "kl": 0.7072630524635315, "learning_rate": 3.2482876712328766e-07, "loss": -0.0069, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1589.0, "completions/max_terminated_length": 1589.0, "completions/mean_length": 246.6607208251953, "completions/mean_terminated_length": 246.6607208251953, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.3617036044597626, "epoch": 7.02054794520548, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.6668507827176282, "kl": 0.8343428373336792, "learning_rate": 3.246575342465753e-07, "loss": 0.0429, "num_tokens": 11690825.0, "reward": 0.8515670299530029, "reward_std": 0.043758608400821686, "rewards/check_gptzero_func/mean": 0.8515670895576477, "rewards/check_gptzero_func/std": 0.2708885073661804, "sampling/importance_sampling_ratio/max": 1.4020246267318726, "sampling/importance_sampling_ratio/mean": 0.9998119473457336, "sampling/importance_sampling_ratio/min": 0.692349910736084, "sampling/sampling_logp_difference/max": 0.3676638603210449, "sampling/sampling_logp_difference/mean": 0.01085105910897255, "step": 1025 }, { "clip_ratio/high_max": 0.012665685266256332, "clip_ratio/high_mean": 0.00470791244879365, "clip_ratio/low_mean": 0.003953203093260527, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008661115542054176, "entropy": 0.3617050349712372, "epoch": 7.027397260273973, "grad_norm": 1.4669545010326894, "kl": 0.8367940187454224, "learning_rate": 3.24486301369863e-07, "loss": 0.0337, "step": 1026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 179.10714721679688, "completions/mean_terminated_length": 179.10714721679688, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.4008658230304718, "epoch": 7.034246575342466, "frac_reward_zero_std": 0.25, "grad_norm": 1.922634952784999, "kl": 0.8073442578315735, "learning_rate": 3.2431506849315067e-07, "loss": -0.0404, "num_tokens": 11705477.0, "reward": 0.8155398964881897, "reward_std": 0.07555156946182251, "rewards/check_gptzero_func/mean": 0.8155398368835449, "rewards/check_gptzero_func/std": 0.3113943040370941, "sampling/importance_sampling_ratio/max": 1.5297715663909912, "sampling/importance_sampling_ratio/mean": 1.0001505613327026, "sampling/importance_sampling_ratio/min": 0.615634024143219, "sampling/sampling_logp_difference/max": 0.48510265350341797, "sampling/sampling_logp_difference/mean": 0.012386215850710869, "step": 1027 }, { "clip_ratio/high_max": 0.01918976567685604, "clip_ratio/high_mean": 0.009780154563486576, "clip_ratio/low_mean": 0.005383952520787716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.015164107084274292, "entropy": 0.4043613076210022, "epoch": 7.041095890410959, "grad_norm": 1.638868469418571, "kl": 0.7409862279891968, "learning_rate": 3.241438356164383e-07, "loss": -0.051, "step": 1028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1061.0, "completions/max_terminated_length": 1061.0, "completions/mean_length": 153.5178680419922, "completions/mean_terminated_length": 153.5178680419922, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.507282555103302, "epoch": 7.0479452054794525, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.163936409889693, "kl": 1.040362000465393, "learning_rate": 3.2397260273972597e-07, "loss": -0.0298, "num_tokens": 11718260.0, "reward": 0.6753307580947876, "reward_std": 0.14688406884670258, "rewards/check_gptzero_func/mean": 0.6753306984901428, "rewards/check_gptzero_func/std": 0.4120730757713318, "sampling/importance_sampling_ratio/max": 1.281414270401001, "sampling/importance_sampling_ratio/mean": 0.9994912147521973, "sampling/importance_sampling_ratio/min": 0.7418438792228699, "sampling/sampling_logp_difference/max": 0.2986164093017578, "sampling/sampling_logp_difference/mean": 0.01594066061079502, "step": 1029 }, { "clip_ratio/high_max": 0.02454308047890663, "clip_ratio/high_mean": 0.00826360285282135, "clip_ratio/low_mean": 0.009039899334311485, "clip_ratio/low_min": 0.0042372881434857845, "clip_ratio/region_mean": 0.017303502187132835, "entropy": 0.5091204047203064, "epoch": 7.054794520547945, "grad_norm": 1.794488716638002, "kl": 0.9581269025802612, "learning_rate": 3.238013698630137e-07, "loss": -0.0405, "step": 1030 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 827.0, "completions/max_terminated_length": 827.0, "completions/mean_length": 83.21428680419922, "completions/mean_terminated_length": 83.21428680419922, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.14105531573295593, "epoch": 7.061643835616438, "frac_reward_zero_std": 0.2857142984867096, "grad_norm": 1.906002000730068, "kl": 1.0231103897094727, "learning_rate": 3.236301369863014e-07, "loss": 0.0152, "num_tokens": 11727418.0, "reward": 0.9429746270179749, "reward_std": 0.014917795546352863, "rewards/check_gptzero_func/mean": 0.9429745674133301, "rewards/check_gptzero_func/std": 0.1584886759519577, "sampling/importance_sampling_ratio/max": 1.4293533563613892, "sampling/importance_sampling_ratio/mean": 1.0003455877304077, "sampling/importance_sampling_ratio/min": 0.7780959010124207, "sampling/sampling_logp_difference/max": 0.3572220802307129, "sampling/sampling_logp_difference/mean": 0.004746598191559315, "step": 1031 }, { "clip_ratio/high_max": 0.02723146788775921, "clip_ratio/high_mean": 0.0052121905609965324, "clip_ratio/low_mean": 0.004116846714168787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.009329036809504032, "entropy": 0.14127962291240692, "epoch": 7.068493150684931, "grad_norm": 1.4096594186511797, "kl": 1.0078706741333008, "learning_rate": 3.23458904109589e-07, "loss": 0.0073, "step": 1032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1080.0, "completions/max_terminated_length": 1080.0, "completions/mean_length": 103.0714340209961, "completions/mean_terminated_length": 103.0714340209961, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 0.2948887348175049, "epoch": 7.075342465753424, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 2.0616662032771966, "kl": 0.9594631195068359, "learning_rate": 3.2328767123287673e-07, "loss": 0.0659, "num_tokens": 11737780.0, "reward": 0.8366848230361938, "reward_std": 0.084529809653759, "rewards/check_gptzero_func/mean": 0.8366847634315491, "rewards/check_gptzero_func/std": 0.32413023710250854, "sampling/importance_sampling_ratio/max": 1.3313391208648682, "sampling/importance_sampling_ratio/mean": 1.00080144405365, "sampling/importance_sampling_ratio/min": 0.7445473670959473, "sampling/sampling_logp_difference/max": 0.2949788570404053, "sampling/sampling_logp_difference/mean": 0.009401079267263412, "step": 1033 }, { "clip_ratio/high_max": 0.024025386199355125, "clip_ratio/high_mean": 0.006723697297275066, "clip_ratio/low_mean": 0.007503698579967022, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.014227395877242088, "entropy": 0.29430022835731506, "epoch": 7.082191780821918, "grad_norm": 1.6351299213435735, "kl": 0.9488458633422852, "learning_rate": 3.231164383561644e-07, "loss": 0.0556, "step": 1034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1079.0, "completions/max_terminated_length": 1079.0, "completions/mean_length": 174.83929443359375, "completions/mean_terminated_length": 174.83929443359375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.5135571360588074, "epoch": 7.089041095890411, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.9824342664923535, "kl": 0.8748860359191895, "learning_rate": 3.2294520547945203e-07, "loss": 0.0081, "num_tokens": 11752229.0, "reward": 0.8127535581588745, "reward_std": 0.08242079615592957, "rewards/check_gptzero_func/mean": 0.8127535581588745, "rewards/check_gptzero_func/std": 0.3295447528362274, "sampling/importance_sampling_ratio/max": 1.4861363172531128, "sampling/importance_sampling_ratio/mean": 0.9997764825820923, "sampling/importance_sampling_ratio/min": 0.6934140920639038, "sampling/sampling_logp_difference/max": 0.3961796760559082, "sampling/sampling_logp_difference/mean": 0.01571512781083584, "step": 1035 }, { "clip_ratio/high_max": 0.017195262014865875, "clip_ratio/high_mean": 0.008578313514590263, "clip_ratio/low_mean": 0.008868152275681496, "clip_ratio/low_min": 0.0017152659129351377, "clip_ratio/region_mean": 0.01744646579027176, "entropy": 0.5146031379699707, "epoch": 7.095890410958904, "grad_norm": 1.671840661418427, "kl": 0.8503050208091736, "learning_rate": 3.227739726027397e-07, "loss": -0.0026, "step": 1036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 495.0, "completions/max_terminated_length": 495.0, "completions/mean_length": 87.60714721679688, "completions/mean_terminated_length": 87.60714721679688, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.2338215559720993, "epoch": 7.102739726027397, "frac_reward_zero_std": 0.3571428656578064, "grad_norm": 1.5166679844380382, "kl": 0.8849712014198303, "learning_rate": 3.226027397260274e-07, "loss": 0.0775, "num_tokens": 11762071.0, "reward": 0.8982133865356445, "reward_std": 0.04079347103834152, "rewards/check_gptzero_func/mean": 0.8982133269309998, "rewards/check_gptzero_func/std": 0.24412143230438232, "sampling/importance_sampling_ratio/max": 1.5704655647277832, "sampling/importance_sampling_ratio/mean": 1.0001217126846313, "sampling/importance_sampling_ratio/min": 0.6960035562515259, "sampling/sampling_logp_difference/max": 0.4513721466064453, "sampling/sampling_logp_difference/mean": 0.0065355910919606686, "step": 1037 }, { "clip_ratio/high_max": 0.015682656317949295, "clip_ratio/high_mean": 0.004585965536534786, "clip_ratio/low_mean": 0.008063344284892082, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012649310752749443, "entropy": 0.23594090342521667, "epoch": 7.109589041095891, "grad_norm": 1.178804620421541, "kl": 0.8866887092590332, "learning_rate": 3.2243150684931503e-07, "loss": 0.0708, "step": 1038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1625.0, "completions/max_terminated_length": 1625.0, "completions/mean_length": 112.01786041259766, "completions/mean_terminated_length": 112.01786041259766, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.2255767583847046, "epoch": 7.116438356164384, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 2.1114896539177286, "kl": 0.8921633958816528, "learning_rate": 3.2226027397260274e-07, "loss": -0.0619, "num_tokens": 11773258.0, "reward": 0.9164789319038391, "reward_std": 0.0636201724410057, "rewards/check_gptzero_func/mean": 0.9164789319038391, "rewards/check_gptzero_func/std": 0.23584571480751038, "sampling/importance_sampling_ratio/max": 1.4806780815124512, "sampling/importance_sampling_ratio/mean": 1.000433087348938, "sampling/importance_sampling_ratio/min": 0.6417651176452637, "sampling/sampling_logp_difference/max": 0.44353294372558594, "sampling/sampling_logp_difference/mean": 0.0067974780686199665, "step": 1039 }, { "clip_ratio/high_max": 0.03196803107857704, "clip_ratio/high_mean": 0.006424069870263338, "clip_ratio/low_mean": 0.007043220102787018, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.013467290438711643, "entropy": 0.2282886803150177, "epoch": 7.123287671232877, "grad_norm": 1.796562434373438, "kl": 0.8856070637702942, "learning_rate": 3.220890410958904e-07, "loss": -0.0689, "step": 1040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 204.17857360839844, "completions/mean_terminated_length": 204.17857360839844, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.36776214838027954, "epoch": 7.13013698630137, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.9097096650534664, "kl": 0.7069388628005981, "learning_rate": 3.219178082191781e-07, "loss": 0.073, "num_tokens": 11789594.0, "reward": 0.8472759127616882, "reward_std": 0.08696217834949493, "rewards/check_gptzero_func/mean": 0.8472758531570435, "rewards/check_gptzero_func/std": 0.2819051146507263, "sampling/importance_sampling_ratio/max": 1.6273653507232666, "sampling/importance_sampling_ratio/mean": 1.0001857280731201, "sampling/importance_sampling_ratio/min": 0.5381271839141846, "sampling/sampling_logp_difference/max": 0.6196603775024414, "sampling/sampling_logp_difference/mean": 0.012759902514517307, "step": 1041 }, { "clip_ratio/high_max": 0.011512134224176407, "clip_ratio/high_mean": 0.006055272649973631, "clip_ratio/low_mean": 0.006438483949750662, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012493757531046867, "entropy": 0.3676012456417084, "epoch": 7.136986301369863, "grad_norm": 1.57406069686775, "kl": 0.6732268929481506, "learning_rate": 3.2174657534246574e-07, "loss": 0.0638, "step": 1042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 125.23214721679688, "completions/mean_terminated_length": 125.23214721679688, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.1908203810453415, "epoch": 7.1438356164383565, "frac_reward_zero_std": 0.3571428656578064, "grad_norm": 1.8422214196201616, "kl": 0.7614781260490417, "learning_rate": 3.215753424657534e-07, "loss": -0.0003, "num_tokens": 11801545.0, "reward": 0.8565126657485962, "reward_std": 0.017134912312030792, "rewards/check_gptzero_func/mean": 0.8565126061439514, "rewards/check_gptzero_func/std": 0.30611351132392883, "sampling/importance_sampling_ratio/max": 1.3975932598114014, "sampling/importance_sampling_ratio/mean": 1.000015139579773, "sampling/importance_sampling_ratio/min": 0.7253901362419128, "sampling/sampling_logp_difference/max": 0.33475160598754883, "sampling/sampling_logp_difference/mean": 0.0067919534631073475, "step": 1043 }, { "clip_ratio/high_max": 0.02522153966128826, "clip_ratio/high_mean": 0.006961158476769924, "clip_ratio/low_mean": 0.005178662482649088, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012139820493757725, "entropy": 0.1913614720106125, "epoch": 7.1506849315068495, "grad_norm": 1.4226514581244611, "kl": 0.7566304206848145, "learning_rate": 3.214041095890411e-07, "loss": -0.0092, "step": 1044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1069.0, "completions/max_terminated_length": 1069.0, "completions/mean_length": 94.4464340209961, "completions/mean_terminated_length": 94.4464340209961, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 0.21962860226631165, "epoch": 7.157534246575342, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.9698560000069782, "kl": 1.1431770324707031, "learning_rate": 3.2123287671232874e-07, "loss": 0.0175, "num_tokens": 11811444.0, "reward": 0.9192683696746826, "reward_std": 0.028573447838425636, "rewards/check_gptzero_func/mean": 0.9192683100700378, "rewards/check_gptzero_func/std": 0.21501131355762482, "sampling/importance_sampling_ratio/max": 1.2862757444381714, "sampling/importance_sampling_ratio/mean": 0.9999254941940308, "sampling/importance_sampling_ratio/min": 0.6337958574295044, "sampling/sampling_logp_difference/max": 0.45602846145629883, "sampling/sampling_logp_difference/mean": 0.006319758947938681, "step": 1045 }, { "clip_ratio/high_max": 0.024758707731962204, "clip_ratio/high_mean": 0.006807568948715925, "clip_ratio/low_mean": 0.00544436601921916, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012251934967935085, "entropy": 0.22013898193836212, "epoch": 7.164383561643835, "grad_norm": 1.5739597090437165, "kl": 1.1394789218902588, "learning_rate": 3.210616438356164e-07, "loss": 0.0079, "step": 1046 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1220.0, "completions/max_terminated_length": 1220.0, "completions/mean_length": 125.60714721679688, "completions/mean_terminated_length": 125.60714721679688, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.29183417558670044, "epoch": 7.171232876712328, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.7989414174856702, "kl": 1.1725139617919922, "learning_rate": 3.2089041095890404e-07, "loss": 0.035, "num_tokens": 11823572.0, "reward": 0.9359339475631714, "reward_std": 0.0400620698928833, "rewards/check_gptzero_func/mean": 0.9359339475631714, "rewards/check_gptzero_func/std": 0.18026471138000488, "sampling/importance_sampling_ratio/max": 1.6242321729660034, "sampling/importance_sampling_ratio/mean": 1.0001622438430786, "sampling/importance_sampling_ratio/min": 0.7321471571922302, "sampling/sampling_logp_difference/max": 0.4850351810455322, "sampling/sampling_logp_difference/mean": 0.008629345335066319, "step": 1047 }, { "clip_ratio/high_max": 0.018018018454313278, "clip_ratio/high_mean": 0.00602801563218236, "clip_ratio/low_mean": 0.004825984593480825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010854000225663185, "entropy": 0.29441705346107483, "epoch": 7.178082191780822, "grad_norm": 1.5032547136207326, "kl": 1.1585713624954224, "learning_rate": 3.207191780821918e-07, "loss": 0.026, "step": 1048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1487.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 218.87501525878906, "completions/mean_terminated_length": 218.87501525878906, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.36032262444496155, "epoch": 7.184931506849315, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.6951364918503296, "kl": 0.8316494822502136, "learning_rate": 3.2054794520547945e-07, "loss": 0.0502, "num_tokens": 11840317.0, "reward": 0.8842892646789551, "reward_std": 0.022721298038959503, "rewards/check_gptzero_func/mean": 0.8842892050743103, "rewards/check_gptzero_func/std": 0.23854005336761475, "sampling/importance_sampling_ratio/max": 1.5972381830215454, "sampling/importance_sampling_ratio/mean": 0.9997307658195496, "sampling/importance_sampling_ratio/min": 0.6598199009895325, "sampling/sampling_logp_difference/max": 0.4682760238647461, "sampling/sampling_logp_difference/mean": 0.01147258747369051, "step": 1049 }, { "clip_ratio/high_max": 0.01275975164026022, "clip_ratio/high_mean": 0.004787889774888754, "clip_ratio/low_mean": 0.0043677655048668385, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.009155655279755592, "entropy": 0.3597826659679413, "epoch": 7.191780821917808, "grad_norm": 1.4680635714238788, "kl": 0.8383938670158386, "learning_rate": 3.203767123287671e-07, "loss": 0.041, "step": 1050 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 238.0535888671875, "completions/mean_terminated_length": 238.0535888671875, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.41051608324050903, "epoch": 7.198630136986301, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.6674025050689882, "kl": 0.7221194505691528, "learning_rate": 3.202054794520548e-07, "loss": -0.0281, "num_tokens": 11858814.0, "reward": 0.8065043687820435, "reward_std": 0.03909733518958092, "rewards/check_gptzero_func/mean": 0.8065043091773987, "rewards/check_gptzero_func/std": 0.33821284770965576, "sampling/importance_sampling_ratio/max": 1.6208345890045166, "sampling/importance_sampling_ratio/mean": 1.0002212524414062, "sampling/importance_sampling_ratio/min": 0.6942037343978882, "sampling/sampling_logp_difference/max": 0.4829411506652832, "sampling/sampling_logp_difference/mean": 0.012337238527834415, "step": 1051 }, { "clip_ratio/high_max": 0.011941448785364628, "clip_ratio/high_mean": 0.006334858480840921, "clip_ratio/low_mean": 0.0031691272743046284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00950398575514555, "entropy": 0.41048336029052734, "epoch": 7.205479452054795, "grad_norm": 1.4804103704042317, "kl": 0.7111905217170715, "learning_rate": 3.2003424657534246e-07, "loss": -0.0372, "step": 1052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 931.0, "completions/max_terminated_length": 931.0, "completions/mean_length": 142.25, "completions/mean_terminated_length": 142.25, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "entropy": 0.3456489145755768, "epoch": 7.212328767123288, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.7512379879948883, "kl": 1.177121877670288, "learning_rate": 3.198630136986301e-07, "loss": 0.1299, "num_tokens": 11871630.0, "reward": 0.8647843599319458, "reward_std": 0.1009923592209816, "rewards/check_gptzero_func/mean": 0.8647843599319458, "rewards/check_gptzero_func/std": 0.2837805151939392, "sampling/importance_sampling_ratio/max": 1.5192279815673828, "sampling/importance_sampling_ratio/mean": 1.0001834630966187, "sampling/importance_sampling_ratio/min": 0.6159943342208862, "sampling/sampling_logp_difference/max": 0.48451757431030273, "sampling/sampling_logp_difference/mean": 0.010677597485482693, "step": 1053 }, { "clip_ratio/high_max": 0.015762463212013245, "clip_ratio/high_mean": 0.004723318386822939, "clip_ratio/low_mean": 0.005391963757574558, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010115282610058784, "entropy": 0.34608015418052673, "epoch": 7.219178082191781, "grad_norm": 1.481030067410224, "kl": 1.1868524551391602, "learning_rate": 3.1969178082191776e-07, "loss": 0.1207, "step": 1054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 282.3035888671875, "completions/mean_terminated_length": 181.6481475830078, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.4915390908718109, "epoch": 7.226027397260274, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.4069118838994905, "kl": 0.6375256776809692, "learning_rate": 3.1952054794520546e-07, "loss": 0.0291, "num_tokens": 11892207.0, "reward": 0.7851560711860657, "reward_std": 0.022632909938693047, "rewards/check_gptzero_func/mean": 0.7851560711860657, "rewards/check_gptzero_func/std": 0.35008952021598816, "sampling/importance_sampling_ratio/max": 1.8540674448013306, "sampling/importance_sampling_ratio/mean": 0.99986332654953, "sampling/importance_sampling_ratio/min": 0.5694338083267212, "sampling/sampling_logp_difference/max": 0.6173818111419678, "sampling/sampling_logp_difference/mean": 0.015591197647154331, "step": 1055 }, { "clip_ratio/high_max": 0.01182654406875372, "clip_ratio/high_mean": 0.006587725132703781, "clip_ratio/low_mean": 0.005276199895888567, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.011863925494253635, "entropy": 0.4919980466365814, "epoch": 7.232876712328767, "grad_norm": 1.2300321010544069, "kl": 0.6249866485595703, "learning_rate": 3.1934931506849316e-07, "loss": 0.0226, "step": 1056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1137.0, "completions/max_terminated_length": 1137.0, "completions/mean_length": 160.75, "completions/mean_terminated_length": 160.75, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.3255602717399597, "epoch": 7.239726027397261, "frac_reward_zero_std": 0.25, "grad_norm": 1.7837912448973918, "kl": 0.8920577764511108, "learning_rate": 3.191780821917808e-07, "loss": -0.055, "num_tokens": 11906481.0, "reward": 0.8894706964492798, "reward_std": 0.06374108791351318, "rewards/check_gptzero_func/mean": 0.889470636844635, "rewards/check_gptzero_func/std": 0.2411511242389679, "sampling/importance_sampling_ratio/max": 1.7324894666671753, "sampling/importance_sampling_ratio/mean": 0.9998868107795715, "sampling/importance_sampling_ratio/min": 0.712352454662323, "sampling/sampling_logp_difference/max": 0.5495593547821045, "sampling/sampling_logp_difference/mean": 0.010499064810574055, "step": 1057 }, { "clip_ratio/high_max": 0.017559658735990524, "clip_ratio/high_mean": 0.0068665980361402035, "clip_ratio/low_mean": 0.003713486948981881, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010580085217952728, "entropy": 0.32660147547721863, "epoch": 7.2465753424657535, "grad_norm": 1.5752507248017682, "kl": 0.890093207359314, "learning_rate": 3.190068493150685e-07, "loss": -0.0639, "step": 1058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 112.71429443359375, "completions/mean_terminated_length": 112.71429443359375, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.19346046447753906, "epoch": 7.2534246575342465, "frac_reward_zero_std": 0.3214285969734192, "grad_norm": 2.013328402680916, "kl": 0.8976244926452637, "learning_rate": 3.1883561643835617e-07, "loss": 0.0191, "num_tokens": 11917505.0, "reward": 0.8728259801864624, "reward_std": 0.03465884178876877, "rewards/check_gptzero_func/mean": 0.8728259205818176, "rewards/check_gptzero_func/std": 0.2790895104408264, "sampling/importance_sampling_ratio/max": 1.2810611724853516, "sampling/importance_sampling_ratio/mean": 1.000105381011963, "sampling/importance_sampling_ratio/min": 0.7388675212860107, "sampling/sampling_logp_difference/max": 0.30263662338256836, "sampling/sampling_logp_difference/mean": 0.0057753329165279865, "step": 1059 }, { "clip_ratio/high_max": 0.021947449073195457, "clip_ratio/high_mean": 0.00630573183298111, "clip_ratio/low_mean": 0.0038132560439407825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.01011898834258318, "entropy": 0.1944977194070816, "epoch": 7.260273972602739, "grad_norm": 1.690172587524088, "kl": 0.8957303762435913, "learning_rate": 3.186643835616438e-07, "loss": 0.0098, "step": 1060 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 790.0, "completions/max_terminated_length": 790.0, "completions/mean_length": 91.62500762939453, "completions/mean_terminated_length": 91.62500762939453, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "entropy": 0.1981586366891861, "epoch": 7.267123287671233, "frac_reward_zero_std": 0.25, "grad_norm": 1.7711519798522737, "kl": 1.0328514575958252, "learning_rate": 3.1849315068493147e-07, "loss": 0.0412, "num_tokens": 11927336.0, "reward": 0.9336236715316772, "reward_std": 0.04545139521360397, "rewards/check_gptzero_func/mean": 0.9336236119270325, "rewards/check_gptzero_func/std": 0.17824161052703857, "sampling/importance_sampling_ratio/max": 1.2873997688293457, "sampling/importance_sampling_ratio/mean": 0.9997330904006958, "sampling/importance_sampling_ratio/min": 0.7916769981384277, "sampling/sampling_logp_difference/max": 0.25262451171875, "sampling/sampling_logp_difference/mean": 0.006152071990072727, "step": 1061 }, { "clip_ratio/high_max": 0.019587082788348198, "clip_ratio/high_mean": 0.004720047581940889, "clip_ratio/low_mean": 0.006035127677023411, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010755174793303013, "entropy": 0.1968890279531479, "epoch": 7.273972602739726, "grad_norm": 1.3828327306824126, "kl": 1.034812331199646, "learning_rate": 3.1832191780821917e-07, "loss": 0.0332, "step": 1062 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1760.0, "completions/max_terminated_length": 1760.0, "completions/mean_length": 152.3928680419922, "completions/mean_terminated_length": 152.3928680419922, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.3603824973106384, "epoch": 7.280821917808219, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.9128243650445542, "kl": 0.7695834040641785, "learning_rate": 3.181506849315068e-07, "loss": -0.0199, "num_tokens": 11940788.0, "reward": 0.8008801937103271, "reward_std": 0.0990741103887558, "rewards/check_gptzero_func/mean": 0.8008801341056824, "rewards/check_gptzero_func/std": 0.3268600404262543, "sampling/importance_sampling_ratio/max": 1.436768651008606, "sampling/importance_sampling_ratio/mean": 0.9995280504226685, "sampling/importance_sampling_ratio/min": 0.6201941967010498, "sampling/sampling_logp_difference/max": 0.4777226448059082, "sampling/sampling_logp_difference/mean": 0.00983391422778368, "step": 1063 }, { "clip_ratio/high_max": 0.02016645297408104, "clip_ratio/high_mean": 0.005939285736531019, "clip_ratio/low_mean": 0.00369249633513391, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00963178277015686, "entropy": 0.3619785010814667, "epoch": 7.287671232876712, "grad_norm": 1.609159823034312, "kl": 0.7678547501564026, "learning_rate": 3.1797945205479447e-07, "loss": -0.0295, "step": 1064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 185.37501525878906, "completions/mean_terminated_length": 185.37501525878906, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.3591359555721283, "epoch": 7.294520547945205, "frac_reward_zero_std": 0.2857142984867096, "grad_norm": 1.7609697966603153, "kl": 0.8704639673233032, "learning_rate": 3.1780821917808223e-07, "loss": 0.0885, "num_tokens": 11955913.0, "reward": 0.9111876487731934, "reward_std": 0.019192306324839592, "rewards/check_gptzero_func/mean": 0.9111875891685486, "rewards/check_gptzero_func/std": 0.18480980396270752, "sampling/importance_sampling_ratio/max": 1.6280443668365479, "sampling/importance_sampling_ratio/mean": 0.9995050430297852, "sampling/importance_sampling_ratio/min": 0.6438544392585754, "sampling/sampling_logp_difference/max": 0.4873795509338379, "sampling/sampling_logp_difference/mean": 0.010906356386840343, "step": 1065 }, { "clip_ratio/high_max": 0.01649276353418827, "clip_ratio/high_mean": 0.006679147481918335, "clip_ratio/low_mean": 0.0049735913053154945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.01165273878723383, "entropy": 0.35984542965888977, "epoch": 7.301369863013699, "grad_norm": 1.4995980527222232, "kl": 0.8724832534790039, "learning_rate": 3.176369863013699e-07, "loss": 0.0794, "step": 1066 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 129.125, "completions/mean_terminated_length": 129.125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.2432529181241989, "epoch": 7.308219178082192, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.5589891737035002, "kl": 0.8298550844192505, "learning_rate": 3.1746575342465753e-07, "loss": 0.0005, "num_tokens": 11968676.0, "reward": 0.9444734454154968, "reward_std": 0.04360417649149895, "rewards/check_gptzero_func/mean": 0.944473385810852, "rewards/check_gptzero_func/std": 0.1632794439792633, "sampling/importance_sampling_ratio/max": 1.5562819242477417, "sampling/importance_sampling_ratio/mean": 1.0000041723251343, "sampling/importance_sampling_ratio/min": 0.7263818383216858, "sampling/sampling_logp_difference/max": 0.44229960441589355, "sampling/sampling_logp_difference/mean": 0.006376274861395359, "step": 1067 }, { "clip_ratio/high_max": 0.021276595070958138, "clip_ratio/high_mean": 0.0030395134817808867, "clip_ratio/low_mean": 0.0029930658638477325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006032579578459263, "entropy": 0.2447921484708786, "epoch": 7.315068493150685, "grad_norm": 1.2785326475882508, "kl": 0.8158043622970581, "learning_rate": 3.172945205479452e-07, "loss": -0.0067, "step": 1068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 905.0, "completions/max_terminated_length": 905.0, "completions/mean_length": 113.64286041259766, "completions/mean_terminated_length": 113.64286041259766, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.3220570981502533, "epoch": 7.321917808219178, "frac_reward_zero_std": 0.2857142984867096, "grad_norm": 1.955740682533006, "kl": 0.8300118446350098, "learning_rate": 3.171232876712329e-07, "loss": -0.0248, "num_tokens": 11979706.0, "reward": 0.9328921437263489, "reward_std": 0.025460010394454002, "rewards/check_gptzero_func/mean": 0.9328921437263489, "rewards/check_gptzero_func/std": 0.205184206366539, "sampling/importance_sampling_ratio/max": 1.234269380569458, "sampling/importance_sampling_ratio/mean": 0.9996713995933533, "sampling/importance_sampling_ratio/min": 0.7339167594909668, "sampling/sampling_logp_difference/max": 0.30935966968536377, "sampling/sampling_logp_difference/mean": 0.007776533253490925, "step": 1069 }, { "clip_ratio/high_max": 0.022103970870375633, "clip_ratio/high_mean": 0.006169219966977835, "clip_ratio/low_mean": 0.004644714295864105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010813934728503227, "entropy": 0.323364794254303, "epoch": 7.328767123287671, "grad_norm": 1.7004182791819626, "kl": 0.8250513076782227, "learning_rate": 3.1695205479452053e-07, "loss": -0.0336, "step": 1070 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1790.0, "completions/max_terminated_length": 1790.0, "completions/mean_length": 148.0357208251953, "completions/mean_terminated_length": 148.0357208251953, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.27303338050842285, "epoch": 7.335616438356165, "frac_reward_zero_std": 0.2142857313156128, "grad_norm": 1.926543632204306, "kl": 0.8449307084083557, "learning_rate": 3.167808219178082e-07, "loss": -0.0305, "num_tokens": 11992742.0, "reward": 0.8695080280303955, "reward_std": 0.04762103036046028, "rewards/check_gptzero_func/mean": 0.8695080876350403, "rewards/check_gptzero_func/std": 0.2948904037475586, "sampling/importance_sampling_ratio/max": 1.4660520553588867, "sampling/importance_sampling_ratio/mean": 1.000091314315796, "sampling/importance_sampling_ratio/min": 0.6816991567611694, "sampling/sampling_logp_difference/max": 0.3831667900085449, "sampling/sampling_logp_difference/mean": 0.008475549519062042, "step": 1071 }, { "clip_ratio/high_max": 0.020140372216701508, "clip_ratio/high_mean": 0.0053822947666049, "clip_ratio/low_mean": 0.007711456157267094, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.01309374999254942, "entropy": 0.2727763056755066, "epoch": 7.342465753424658, "grad_norm": 1.6238054982689936, "kl": 0.8474336266517639, "learning_rate": 3.1660958904109583e-07, "loss": -0.0407, "step": 1072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 851.0, "completions/max_terminated_length": 851.0, "completions/mean_length": 98.33928680419922, "completions/mean_terminated_length": 98.33928680419922, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.2140590399503708, "epoch": 7.3493150684931505, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.6964807040852294, "kl": 0.7154012322425842, "learning_rate": 3.1643835616438354e-07, "loss": 0.1392, "num_tokens": 12002925.0, "reward": 0.8374601602554321, "reward_std": 0.02045154944062233, "rewards/check_gptzero_func/mean": 0.8374601006507874, "rewards/check_gptzero_func/std": 0.32574644684791565, "sampling/importance_sampling_ratio/max": 1.288321614265442, "sampling/importance_sampling_ratio/mean": 1.0000026226043701, "sampling/importance_sampling_ratio/min": 0.6484401822090149, "sampling/sampling_logp_difference/max": 0.4331855773925781, "sampling/sampling_logp_difference/mean": 0.006677263416349888, "step": 1073 }, { "clip_ratio/high_max": 0.02673267386853695, "clip_ratio/high_mean": 0.006424210965633392, "clip_ratio/low_mean": 0.008325029164552689, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.014749239198863506, "entropy": 0.21533329784870148, "epoch": 7.3561643835616435, "grad_norm": 1.3842457566362971, "kl": 0.711950421333313, "learning_rate": 3.1626712328767124e-07, "loss": 0.1313, "step": 1074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1673.0, "completions/max_terminated_length": 1673.0, "completions/mean_length": 232.85714721679688, "completions/mean_terminated_length": 232.85714721679688, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.3712368905544281, "epoch": 7.363013698630137, "frac_reward_zero_std": 0.25, "grad_norm": 2.283279170109953, "kl": 0.9730749726295471, "learning_rate": 3.160958904109589e-07, "loss": -0.004, "num_tokens": 12020569.0, "reward": 0.8520470261573792, "reward_std": 0.06416142731904984, "rewards/check_gptzero_func/mean": 0.8520469665527344, "rewards/check_gptzero_func/std": 0.27503731846809387, "sampling/importance_sampling_ratio/max": 1.5454834699630737, "sampling/importance_sampling_ratio/mean": 1.0000687837600708, "sampling/importance_sampling_ratio/min": 0.5363040566444397, "sampling/sampling_logp_difference/max": 0.623054027557373, "sampling/sampling_logp_difference/mean": 0.01131819561123848, "step": 1075 }, { "clip_ratio/high_max": 0.012696041725575924, "clip_ratio/high_mean": 0.005628228187561035, "clip_ratio/low_mean": 0.002237310167402029, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007865538820624352, "entropy": 0.37328487634658813, "epoch": 7.36986301369863, "grad_norm": 1.4533118278797552, "kl": 0.9216662645339966, "learning_rate": 3.159246575342466e-07, "loss": -0.0123, "step": 1076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 162.23214721679688, "completions/mean_terminated_length": 162.23214721679688, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 0.3009152114391327, "epoch": 7.376712328767123, "frac_reward_zero_std": 0.25, "grad_norm": 1.7807797561581027, "kl": 0.5811677575111389, "learning_rate": 3.1575342465753424e-07, "loss": 0.0272, "num_tokens": 12034954.0, "reward": 0.9071972966194153, "reward_std": 0.030361713841557503, "rewards/check_gptzero_func/mean": 0.9071972966194153, "rewards/check_gptzero_func/std": 0.23798483610153198, "sampling/importance_sampling_ratio/max": 1.467151403427124, "sampling/importance_sampling_ratio/mean": 1.0002614259719849, "sampling/importance_sampling_ratio/min": 0.7001726031303406, "sampling/sampling_logp_difference/max": 0.38332271575927734, "sampling/sampling_logp_difference/mean": 0.008739674463868141, "step": 1077 }, { "clip_ratio/high_max": 0.016027875244617462, "clip_ratio/high_mean": 0.005960599519312382, "clip_ratio/low_mean": 0.004698034375905991, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010658634826540947, "entropy": 0.30087175965309143, "epoch": 7.383561643835616, "grad_norm": 1.5303042941850784, "kl": 0.5782877802848816, "learning_rate": 3.155821917808219e-07, "loss": 0.0179, "step": 1078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 183.3928680419922, "completions/mean_terminated_length": 183.3928680419922, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.48045679926872253, "epoch": 7.390410958904109, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.868127793361382, "kl": 0.8193210959434509, "learning_rate": 3.1541095890410955e-07, "loss": -0.0058, "num_tokens": 12049858.0, "reward": 0.8307894468307495, "reward_std": 0.0500495545566082, "rewards/check_gptzero_func/mean": 0.8307894468307495, "rewards/check_gptzero_func/std": 0.30374637246131897, "sampling/importance_sampling_ratio/max": 1.5181519985198975, "sampling/importance_sampling_ratio/mean": 1.0000964403152466, "sampling/importance_sampling_ratio/min": 0.6552981734275818, "sampling/sampling_logp_difference/max": 0.4226648807525635, "sampling/sampling_logp_difference/mean": 0.013523179106414318, "step": 1079 }, { "clip_ratio/high_max": 0.013292117044329643, "clip_ratio/high_mean": 0.0062258606776595116, "clip_ratio/low_mean": 0.004068806767463684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010294667445123196, "entropy": 0.48059484362602234, "epoch": 7.397260273972603, "grad_norm": 1.5904819058602937, "kl": 0.7828446626663208, "learning_rate": 3.1523972602739725e-07, "loss": -0.0149, "step": 1080 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/max_terminated_length": 213.0, "completions/mean_length": 63.982147216796875, "completions/mean_terminated_length": 63.982147216796875, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.1365184634923935, "epoch": 7.404109589041096, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.2898567002577974, "kl": 1.167317509651184, "learning_rate": 3.150684931506849e-07, "loss": 0.0028, "num_tokens": 12058161.0, "reward": 0.9305173754692078, "reward_std": 0.04472803324460983, "rewards/check_gptzero_func/mean": 0.930517315864563, "rewards/check_gptzero_func/std": 0.22879010438919067, "sampling/importance_sampling_ratio/max": 1.1875576972961426, "sampling/importance_sampling_ratio/mean": 1.000422716140747, "sampling/importance_sampling_ratio/min": 0.7870317697525024, "sampling/sampling_logp_difference/max": 0.2394866943359375, "sampling/sampling_logp_difference/mean": 0.0039443885907530785, "step": 1081 }, { "clip_ratio/high_max": 0.03170028701424599, "clip_ratio/high_mean": 0.00818831566721201, "clip_ratio/low_mean": 0.006575849372893572, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.014764164574444294, "entropy": 0.13988284766674042, "epoch": 7.410958904109589, "grad_norm": 0.8791946585245487, "kl": 1.1569923162460327, "learning_rate": 3.1489726027397255e-07, "loss": -0.0017, "step": 1082 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 995.0, "completions/max_terminated_length": 995.0, "completions/mean_length": 134.2678680419922, "completions/mean_terminated_length": 134.2678680419922, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.31177574396133423, "epoch": 7.417808219178082, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.8612836900894953, "kl": 0.8299530148506165, "learning_rate": 3.147260273972603e-07, "loss": 0.0764, "num_tokens": 12070738.0, "reward": 0.8930847644805908, "reward_std": 0.07654815167188644, "rewards/check_gptzero_func/mean": 0.893084704875946, "rewards/check_gptzero_func/std": 0.24103525280952454, "sampling/importance_sampling_ratio/max": 1.3952044248580933, "sampling/importance_sampling_ratio/mean": 1.0000476837158203, "sampling/importance_sampling_ratio/min": 0.7290370464324951, "sampling/sampling_logp_difference/max": 0.3330409526824951, "sampling/sampling_logp_difference/mean": 0.009749602526426315, "step": 1083 }, { "clip_ratio/high_max": 0.02468007244169712, "clip_ratio/high_mean": 0.00688974792137742, "clip_ratio/low_mean": 0.006150186527520418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.013039933517575264, "entropy": 0.3117469251155853, "epoch": 7.424657534246576, "grad_norm": 1.566695698884728, "kl": 0.8342047333717346, "learning_rate": 3.1455479452054796e-07, "loss": 0.0668, "step": 1084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2441.0, "completions/max_terminated_length": 2441.0, "completions/mean_length": 244.0178680419922, "completions/mean_terminated_length": 244.0178680419922, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.3814590275287628, "epoch": 7.431506849315069, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.5171302553967247, "kl": 0.9564229249954224, "learning_rate": 3.143835616438356e-07, "loss": 0.0341, "num_tokens": 12089241.0, "reward": 0.8823171257972717, "reward_std": 0.0601145476102829, "rewards/check_gptzero_func/mean": 0.882317066192627, "rewards/check_gptzero_func/std": 0.2673826217651367, "sampling/importance_sampling_ratio/max": 1.7182224988937378, "sampling/importance_sampling_ratio/mean": 1.0003279447555542, "sampling/importance_sampling_ratio/min": 0.5258402228355408, "sampling/sampling_logp_difference/max": 0.6427578926086426, "sampling/sampling_logp_difference/mean": 0.011464936658740044, "step": 1085 }, { "clip_ratio/high_max": 0.015446608886122704, "clip_ratio/high_mean": 0.006028418894857168, "clip_ratio/low_mean": 0.003668391378596425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.009696810506284237, "entropy": 0.38275566697120667, "epoch": 7.438356164383562, "grad_norm": 1.3343033354732685, "kl": 0.95548015832901, "learning_rate": 3.1421232876712326e-07, "loss": 0.0258, "step": 1086 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 937.0, "completions/mean_length": 199.85714721679688, "completions/mean_terminated_length": 96.14814758300781, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "entropy": 0.12567110359668732, "epoch": 7.445205479452055, "frac_reward_zero_std": 0.25, "grad_norm": 0.8868185419186773, "kl": 0.7940899729728699, "learning_rate": 3.1404109589041096e-07, "loss": 0.357, "num_tokens": 12105571.0, "reward": 0.9223061203956604, "reward_std": 0.07454496622085571, "rewards/check_gptzero_func/mean": 0.9223060607910156, "rewards/check_gptzero_func/std": 0.21746741235256195, "sampling/importance_sampling_ratio/max": 1.6065460443496704, "sampling/importance_sampling_ratio/mean": 1.000308871269226, "sampling/importance_sampling_ratio/min": 0.7725920081138611, "sampling/sampling_logp_difference/max": 0.4740865230560303, "sampling/sampling_logp_difference/mean": 0.003490953240543604, "step": 1087 }, { "clip_ratio/high_max": 0.013888888992369175, "clip_ratio/high_mean": 0.003658152651041746, "clip_ratio/low_mean": 0.001955486135557294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005613639019429684, "entropy": 0.12567773461341858, "epoch": 7.4520547945205475, "grad_norm": 0.6775710743493149, "kl": 0.7864994406700134, "learning_rate": 3.138698630136986e-07, "loss": 0.3532, "step": 1088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1048.0, "completions/max_terminated_length": 1048.0, "completions/mean_length": 105.33928680419922, "completions/mean_terminated_length": 105.33928680419922, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.23797304928302765, "epoch": 7.458904109589041, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.860506100749319, "kl": 1.1797635555267334, "learning_rate": 3.1369863013698626e-07, "loss": 0.0153, "num_tokens": 12116474.0, "reward": 0.9500237703323364, "reward_std": 0.04425344988703728, "rewards/check_gptzero_func/mean": 0.9500237107276917, "rewards/check_gptzero_func/std": 0.1657572090625763, "sampling/importance_sampling_ratio/max": 1.3316152095794678, "sampling/importance_sampling_ratio/mean": 1.000294804573059, "sampling/importance_sampling_ratio/min": 0.7728429436683655, "sampling/sampling_logp_difference/max": 0.2863926887512207, "sampling/sampling_logp_difference/mean": 0.00713144987821579, "step": 1089 }, { "clip_ratio/high_max": 0.025641025975346565, "clip_ratio/high_mean": 0.0045618535950779915, "clip_ratio/low_mean": 0.004327605944126844, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008889460004866123, "entropy": 0.238152414560318, "epoch": 7.465753424657534, "grad_norm": 1.503780894850397, "kl": 1.1265313625335693, "learning_rate": 3.1352739726027396e-07, "loss": 0.0067, "step": 1090 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1583.0, "completions/max_terminated_length": 1583.0, "completions/mean_length": 165.625, "completions/mean_terminated_length": 165.625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.3901457190513611, "epoch": 7.472602739726027, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.8692664658001639, "kl": 0.8026970624923706, "learning_rate": 3.133561643835616e-07, "loss": 0.0602, "num_tokens": 12130405.0, "reward": 0.8602096438407898, "reward_std": 0.06193319708108902, "rewards/check_gptzero_func/mean": 0.860209584236145, "rewards/check_gptzero_func/std": 0.2807546555995941, "sampling/importance_sampling_ratio/max": 1.4030100107192993, "sampling/importance_sampling_ratio/mean": 0.9995079636573792, "sampling/importance_sampling_ratio/min": 0.6042060852050781, "sampling/sampling_logp_difference/max": 0.5038399696350098, "sampling/sampling_logp_difference/mean": 0.011319976300001144, "step": 1091 }, { "clip_ratio/high_max": 0.010948904789984226, "clip_ratio/high_mean": 0.004705849569290876, "clip_ratio/low_mean": 0.006528132129460573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.011233980767428875, "entropy": 0.3887055814266205, "epoch": 7.47945205479452, "grad_norm": 1.6423419236975283, "kl": 0.8040481209754944, "learning_rate": 3.131849315068493e-07, "loss": 0.0505, "step": 1092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1209.0, "completions/max_terminated_length": 1209.0, "completions/mean_length": 170.67857360839844, "completions/mean_terminated_length": 170.67857360839844, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.40043559670448303, "epoch": 7.486301369863014, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.75013826102151, "kl": 0.7368966341018677, "learning_rate": 3.1301369863013697e-07, "loss": -0.0513, "num_tokens": 12144849.0, "reward": 0.8463109135627747, "reward_std": 0.03924552723765373, "rewards/check_gptzero_func/mean": 0.8463109135627747, "rewards/check_gptzero_func/std": 0.29401594400405884, "sampling/importance_sampling_ratio/max": 1.4539958238601685, "sampling/importance_sampling_ratio/mean": 1.000158667564392, "sampling/importance_sampling_ratio/min": 0.6939412951469421, "sampling/sampling_logp_difference/max": 0.3743155002593994, "sampling/sampling_logp_difference/mean": 0.012042208574712276, "step": 1093 }, { "clip_ratio/high_max": 0.021645022556185722, "clip_ratio/high_mean": 0.009069222025573254, "clip_ratio/low_mean": 0.004383661784231663, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.013452885672450066, "entropy": 0.4027894139289856, "epoch": 7.493150684931507, "grad_norm": 1.519566213960819, "kl": 0.7284137010574341, "learning_rate": 3.1284246575342467e-07, "loss": -0.0606, "step": 1094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 172.2857208251953, "completions/mean_terminated_length": 172.2857208251953, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.3730085790157318, "epoch": 7.5, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.7946901620727485, "kl": 0.8020175099372864, "learning_rate": 3.126712328767123e-07, "loss": 0.0156, "num_tokens": 12159693.0, "reward": 0.8845510482788086, "reward_std": 0.044246625155210495, "rewards/check_gptzero_func/mean": 0.8845509886741638, "rewards/check_gptzero_func/std": 0.22789493203163147, "sampling/importance_sampling_ratio/max": 1.4058009386062622, "sampling/importance_sampling_ratio/mean": 1.0000170469284058, "sampling/importance_sampling_ratio/min": 0.6816624402999878, "sampling/sampling_logp_difference/max": 0.3832206726074219, "sampling/sampling_logp_difference/mean": 0.009941771626472473, "step": 1095 }, { "clip_ratio/high_max": 0.016038792207837105, "clip_ratio/high_mean": 0.005342650227248669, "clip_ratio/low_mean": 0.004955441690981388, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010298090986907482, "entropy": 0.3729168772697449, "epoch": 7.506849315068493, "grad_norm": 1.5610442705969296, "kl": 0.800896942615509, "learning_rate": 3.1249999999999997e-07, "loss": 0.0066, "step": 1096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1562.0, "completions/max_terminated_length": 1562.0, "completions/mean_length": 106.46428680419922, "completions/mean_terminated_length": 106.46428680419922, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.34395602345466614, "epoch": 7.513698630136986, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.710751915181411, "kl": 0.9004575610160828, "learning_rate": 3.123287671232877e-07, "loss": -0.197, "num_tokens": 12170389.0, "reward": 0.9032120704650879, "reward_std": 0.06586693227291107, "rewards/check_gptzero_func/mean": 0.9032120108604431, "rewards/check_gptzero_func/std": 0.22393080592155457, "sampling/importance_sampling_ratio/max": 1.3013652563095093, "sampling/importance_sampling_ratio/mean": 1.0008031129837036, "sampling/importance_sampling_ratio/min": 0.7818045616149902, "sampling/sampling_logp_difference/max": 0.2634139060974121, "sampling/sampling_logp_difference/mean": 0.007823648862540722, "step": 1097 }, { "clip_ratio/high_max": 0.032822757959365845, "clip_ratio/high_mean": 0.008431805297732353, "clip_ratio/low_mean": 0.0015170451952144504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.009948850609362125, "entropy": 0.3456380069255829, "epoch": 7.52054794520548, "grad_norm": 1.5185991196674917, "kl": 0.8974536657333374, "learning_rate": 3.121575342465753e-07, "loss": -0.2052, "step": 1098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 299.58929443359375, "completions/mean_terminated_length": 250.49090576171875, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 0.45694851875305176, "epoch": 7.527397260273973, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.3020397487332807, "kl": 0.7751431465148926, "learning_rate": 3.11986301369863e-07, "loss": -0.1018, "num_tokens": 12192156.0, "reward": 0.8356425762176514, "reward_std": 0.041011761873960495, "rewards/check_gptzero_func/mean": 0.8356425166130066, "rewards/check_gptzero_func/std": 0.29891419410705566, "sampling/importance_sampling_ratio/max": 1.4689127206802368, "sampling/importance_sampling_ratio/mean": 1.0001674890518188, "sampling/importance_sampling_ratio/min": 0.6915788054466248, "sampling/sampling_logp_difference/max": 0.3845224976539612, "sampling/sampling_logp_difference/mean": 0.01255687978118658, "step": 1099 }, { "clip_ratio/high_max": 0.010529891587793827, "clip_ratio/high_mean": 0.004705317318439484, "clip_ratio/low_mean": 0.0029032756574451923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007608593441545963, "entropy": 0.45787641406059265, "epoch": 7.534246575342466, "grad_norm": 1.1918948040667947, "kl": 0.7735207676887512, "learning_rate": 3.1181506849315063e-07, "loss": -0.1089, "step": 1100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1612.0, "completions/max_terminated_length": 1612.0, "completions/mean_length": 171.1607208251953, "completions/mean_terminated_length": 171.1607208251953, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.47710171341896057, "epoch": 7.541095890410959, "frac_reward_zero_std": 0.25, "grad_norm": 1.7762447773301786, "kl": 0.8191900253295898, "learning_rate": 3.116438356164384e-07, "loss": -0.0546, "num_tokens": 12206705.0, "reward": 0.7913175821304321, "reward_std": 0.07258784770965576, "rewards/check_gptzero_func/mean": 0.7913175225257874, "rewards/check_gptzero_func/std": 0.35447216033935547, "sampling/importance_sampling_ratio/max": 1.3710262775421143, "sampling/importance_sampling_ratio/mean": 0.9992140531539917, "sampling/importance_sampling_ratio/min": 0.3925726115703583, "sampling/sampling_logp_difference/max": 0.9350337982177734, "sampling/sampling_logp_difference/mean": 0.013508946634829044, "step": 1101 }, { "clip_ratio/high_max": 0.0129163833335042, "clip_ratio/high_mean": 0.00813362468034029, "clip_ratio/low_mean": 0.004702673759311438, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.01283629797399044, "entropy": 0.4778262972831726, "epoch": 7.5479452054794525, "grad_norm": 1.5311741780493433, "kl": 0.8044716119766235, "learning_rate": 3.1147260273972603e-07, "loss": -0.0638, "step": 1102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1997.0, "completions/max_terminated_length": 1997.0, "completions/mean_length": 250.87501525878906, "completions/mean_terminated_length": 250.87501525878906, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.463030070066452, "epoch": 7.554794520547945, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.407853492437863, "kl": 0.8248174786567688, "learning_rate": 3.113013698630137e-07, "loss": 0.1242, "num_tokens": 12225350.0, "reward": 0.8411685824394226, "reward_std": 0.08760982006788254, "rewards/check_gptzero_func/mean": 0.8411685228347778, "rewards/check_gptzero_func/std": 0.3193894922733307, "sampling/importance_sampling_ratio/max": 1.5724027156829834, "sampling/importance_sampling_ratio/mean": 1.0002714395523071, "sampling/importance_sampling_ratio/min": 0.6527242660522461, "sampling/sampling_logp_difference/max": 0.4526047706604004, "sampling/sampling_logp_difference/mean": 0.012692851014435291, "step": 1103 }, { "clip_ratio/high_max": 0.010499523021280766, "clip_ratio/high_mean": 0.004992184694856405, "clip_ratio/low_mean": 0.004495948553085327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00948813371360302, "entropy": 0.46312740445137024, "epoch": 7.561643835616438, "grad_norm": 1.2544697857728453, "kl": 0.8311688303947449, "learning_rate": 3.111301369863014e-07, "loss": 0.1162, "step": 1104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2510.0, "completions/max_terminated_length": 2510.0, "completions/mean_length": 236.19644165039062, "completions/mean_terminated_length": 236.19644165039062, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.4194251596927643, "epoch": 7.568493150684931, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.5037881811642482, "kl": 0.6764751672744751, "learning_rate": 3.1095890410958904e-07, "loss": 0.1408, "num_tokens": 12243289.0, "reward": 0.8368136286735535, "reward_std": 0.02697170525789261, "rewards/check_gptzero_func/mean": 0.8368136286735535, "rewards/check_gptzero_func/std": 0.29990580677986145, "sampling/importance_sampling_ratio/max": 1.60359525680542, "sampling/importance_sampling_ratio/mean": 1.0004528760910034, "sampling/importance_sampling_ratio/min": 0.6298264265060425, "sampling/sampling_logp_difference/max": 0.4722480773925781, "sampling/sampling_logp_difference/mean": 0.011335371993482113, "step": 1105 }, { "clip_ratio/high_max": 0.010608486831188202, "clip_ratio/high_mean": 0.0040182084776461124, "clip_ratio/low_mean": 0.003952255938202143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007970464415848255, "entropy": 0.42031970620155334, "epoch": 7.575342465753424, "grad_norm": 1.36104254542272, "kl": 0.6768494248390198, "learning_rate": 3.107876712328767e-07, "loss": 0.1329, "step": 1106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 998.0, "completions/max_terminated_length": 998.0, "completions/mean_length": 117.87500762939453, "completions/mean_terminated_length": 117.87500762939453, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.3338184952735901, "epoch": 7.582191780821918, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.7705955544326648, "kl": 1.0118085145950317, "learning_rate": 3.1061643835616434e-07, "loss": 0.0576, "num_tokens": 12254510.0, "reward": 0.8831028938293457, "reward_std": 0.07523977011442184, "rewards/check_gptzero_func/mean": 0.8831028342247009, "rewards/check_gptzero_func/std": 0.2722071707248688, "sampling/importance_sampling_ratio/max": 1.2745164632797241, "sampling/importance_sampling_ratio/mean": 1.0002343654632568, "sampling/importance_sampling_ratio/min": 0.7226651906967163, "sampling/sampling_logp_difference/max": 0.32480931282043457, "sampling/sampling_logp_difference/mean": 0.009456364437937737, "step": 1107 }, { "clip_ratio/high_max": 0.01481075119227171, "clip_ratio/high_mean": 0.005769948475062847, "clip_ratio/low_mean": 0.006994080264121294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012764028273522854, "entropy": 0.334159791469574, "epoch": 7.589041095890411, "grad_norm": 1.4720217141280658, "kl": 1.007488489151001, "learning_rate": 3.1044520547945204e-07, "loss": 0.049, "step": 1108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 783.0, "completions/max_terminated_length": 783.0, "completions/mean_length": 119.39286041259766, "completions/mean_terminated_length": 119.39286041259766, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.33294084668159485, "epoch": 7.595890410958904, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.7038823639866492, "kl": 0.9656710028648376, "learning_rate": 3.102739726027397e-07, "loss": 0.0649, "num_tokens": 12266492.0, "reward": 0.9459635019302368, "reward_std": 0.0331876277923584, "rewards/check_gptzero_func/mean": 0.945963442325592, "rewards/check_gptzero_func/std": 0.17619767785072327, "sampling/importance_sampling_ratio/max": 1.3812024593353271, "sampling/importance_sampling_ratio/mean": 1.000322699546814, "sampling/importance_sampling_ratio/min": 0.776596188545227, "sampling/sampling_logp_difference/max": 0.3229544162750244, "sampling/sampling_logp_difference/mean": 0.009174666367471218, "step": 1109 }, { "clip_ratio/high_max": 0.017553191632032394, "clip_ratio/high_mean": 0.004752867855131626, "clip_ratio/low_mean": 0.006706885062158108, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.011459752917289734, "entropy": 0.3340610861778259, "epoch": 7.602739726027397, "grad_norm": 1.3905562505475622, "kl": 0.9611565470695496, "learning_rate": 3.101027397260274e-07, "loss": 0.0565, "step": 1110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1065.0, "completions/max_terminated_length": 1065.0, "completions/mean_length": 119.53572082519531, "completions/mean_terminated_length": 119.53572082519531, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.3122285306453705, "epoch": 7.609589041095891, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.729632814915532, "kl": 1.0478649139404297, "learning_rate": 3.0993150684931505e-07, "loss": -0.0283, "num_tokens": 12278264.0, "reward": 0.8971388936042786, "reward_std": 0.04691382125020027, "rewards/check_gptzero_func/mean": 0.8971388936042786, "rewards/check_gptzero_func/std": 0.2224290668964386, "sampling/importance_sampling_ratio/max": 1.652424931526184, "sampling/importance_sampling_ratio/mean": 1.000342845916748, "sampling/importance_sampling_ratio/min": 0.6771529316902161, "sampling/sampling_logp_difference/max": 0.5022438764572144, "sampling/sampling_logp_difference/mean": 0.009685957804322243, "step": 1111 }, { "clip_ratio/high_max": 0.022579027339816093, "clip_ratio/high_mean": 0.00639896746724844, "clip_ratio/low_mean": 0.004897404927760363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.011296371929347515, "entropy": 0.31283918023109436, "epoch": 7.616438356164384, "grad_norm": 1.4859879520624646, "kl": 1.029057264328003, "learning_rate": 3.0976027397260275e-07, "loss": -0.0371, "step": 1112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1535.0, "completions/max_terminated_length": 1535.0, "completions/mean_length": 230.33929443359375, "completions/mean_terminated_length": 230.33929443359375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.5636811256408691, "epoch": 7.623287671232877, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.6539240015633028, "kl": 0.7294929623603821, "learning_rate": 3.095890410958904e-07, "loss": 0.0455, "num_tokens": 12295909.0, "reward": 0.8533517122268677, "reward_std": 0.055163055658340454, "rewards/check_gptzero_func/mean": 0.8533516526222229, "rewards/check_gptzero_func/std": 0.2913447618484497, "sampling/importance_sampling_ratio/max": 1.3417989015579224, "sampling/importance_sampling_ratio/mean": 0.9999805688858032, "sampling/importance_sampling_ratio/min": 0.6976086497306824, "sampling/sampling_logp_difference/max": 0.36009693145751953, "sampling/sampling_logp_difference/mean": 0.015558164566755295, "step": 1113 }, { "clip_ratio/high_max": 0.011988716199994087, "clip_ratio/high_mean": 0.0046724858693778515, "clip_ratio/low_mean": 0.004016309976577759, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008688795380294323, "entropy": 0.5633760094642639, "epoch": 7.63013698630137, "grad_norm": 1.5060274288413091, "kl": 0.7199903130531311, "learning_rate": 3.0941780821917805e-07, "loss": 0.0371, "step": 1114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1086.0, "completions/mean_length": 179.17857360839844, "completions/mean_terminated_length": 127.89090728759766, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.3021060824394226, "epoch": 7.636986301369863, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.2552110052971999, "kl": 0.9415920972824097, "learning_rate": 3.0924657534246575e-07, "loss": 0.1975, "num_tokens": 12310829.0, "reward": 0.8822661638259888, "reward_std": 0.04802171140909195, "rewards/check_gptzero_func/mean": 0.882266104221344, "rewards/check_gptzero_func/std": 0.26848116517066956, "sampling/importance_sampling_ratio/max": 1.5467331409454346, "sampling/importance_sampling_ratio/mean": 0.9991963505744934, "sampling/importance_sampling_ratio/min": 0.6203761100769043, "sampling/sampling_logp_difference/max": 0.4774293899536133, "sampling/sampling_logp_difference/mean": 0.007935859262943268, "step": 1115 }, { "clip_ratio/high_max": 0.023415422067046165, "clip_ratio/high_mean": 0.006325611378997564, "clip_ratio/low_mean": 0.0032980882097035646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.009623698890209198, "entropy": 0.301154226064682, "epoch": 7.6438356164383565, "grad_norm": 1.0421085589122616, "kl": 0.9430093169212341, "learning_rate": 3.090753424657534e-07, "loss": 0.1911, "step": 1116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 161.67857360839844, "completions/mean_terminated_length": 161.67857360839844, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 0.3746240437030792, "epoch": 7.6506849315068495, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.7957204680388306, "kl": 0.850213885307312, "learning_rate": 3.0890410958904105e-07, "loss": 0.0582, "num_tokens": 12324619.0, "reward": 0.8813875317573547, "reward_std": 0.04644147306680679, "rewards/check_gptzero_func/mean": 0.8813875317573547, "rewards/check_gptzero_func/std": 0.23249320685863495, "sampling/importance_sampling_ratio/max": 1.2742605209350586, "sampling/importance_sampling_ratio/mean": 0.9996247887611389, "sampling/importance_sampling_ratio/min": 0.729505181312561, "sampling/sampling_logp_difference/max": 0.3153887987136841, "sampling/sampling_logp_difference/mean": 0.010490123182535172, "step": 1117 }, { "clip_ratio/high_max": 0.01711156778037548, "clip_ratio/high_mean": 0.005949870683252811, "clip_ratio/low_mean": 0.0037980841007083654, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.009747954085469246, "entropy": 0.3752608895301819, "epoch": 7.657534246575342, "grad_norm": 1.6875802015850394, "kl": 0.8448943495750427, "learning_rate": 3.087328767123287e-07, "loss": 0.0486, "step": 1118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1245.0, "completions/max_terminated_length": 1245.0, "completions/mean_length": 145.8928680419922, "completions/mean_terminated_length": 145.8928680419922, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.35541003942489624, "epoch": 7.664383561643835, "frac_reward_zero_std": 0.3571428656578064, "grad_norm": 1.6766065540197055, "kl": 0.7592108845710754, "learning_rate": 3.0856164383561646e-07, "loss": 0.0577, "num_tokens": 12338371.0, "reward": 0.9204118251800537, "reward_std": 0.030974047258496284, "rewards/check_gptzero_func/mean": 0.9204117655754089, "rewards/check_gptzero_func/std": 0.20204812288284302, "sampling/importance_sampling_ratio/max": 1.2643444538116455, "sampling/importance_sampling_ratio/mean": 1.0003869533538818, "sampling/importance_sampling_ratio/min": 0.7589983940124512, "sampling/sampling_logp_difference/max": 0.2757556438446045, "sampling/sampling_logp_difference/mean": 0.009436934255063534, "step": 1119 }, { "clip_ratio/high_max": 0.011647254228591919, "clip_ratio/high_mean": 0.0039487374015152454, "clip_ratio/low_mean": 0.004945606924593449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008894344791769981, "entropy": 0.3546393811702728, "epoch": 7.671232876712329, "grad_norm": 1.4851079646157568, "kl": 0.7546426057815552, "learning_rate": 3.083904109589041e-07, "loss": 0.0492, "step": 1120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 380.0, "completions/max_terminated_length": 380.0, "completions/mean_length": 88.60714721679688, "completions/mean_terminated_length": 88.60714721679688, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.24980460107326508, "epoch": 7.678082191780822, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.2961312885569969, "kl": 0.8417576551437378, "learning_rate": 3.0821917808219176e-07, "loss": 0.0753, "num_tokens": 12348489.0, "reward": 0.9425601959228516, "reward_std": 0.027444329112768173, "rewards/check_gptzero_func/mean": 0.9425601363182068, "rewards/check_gptzero_func/std": 0.22526520490646362, "sampling/importance_sampling_ratio/max": 1.2474557161331177, "sampling/importance_sampling_ratio/mean": 1.0002611875534058, "sampling/importance_sampling_ratio/min": 0.7924268841743469, "sampling/sampling_logp_difference/max": 0.23265504837036133, "sampling/sampling_logp_difference/mean": 0.005834392737597227, "step": 1121 }, { "clip_ratio/high_max": 0.02150537632405758, "clip_ratio/high_mean": 0.004426744766533375, "clip_ratio/low_mean": 0.008003859780728817, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012430603615939617, "entropy": 0.25076133012771606, "epoch": 7.684931506849315, "grad_norm": 1.0177297322440773, "kl": 0.8450151085853577, "learning_rate": 3.0804794520547947e-07, "loss": 0.0695, "step": 1122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 217.23214721679688, "completions/mean_terminated_length": 217.23214721679688, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.49176618456840515, "epoch": 7.691780821917808, "frac_reward_zero_std": 0.2142857313156128, "grad_norm": 1.6885264950124417, "kl": 0.9027073979377747, "learning_rate": 3.078767123287671e-07, "loss": 0.0202, "num_tokens": 12365500.0, "reward": 0.8649984002113342, "reward_std": 0.06466367840766907, "rewards/check_gptzero_func/mean": 0.8649982810020447, "rewards/check_gptzero_func/std": 0.24563239514827728, "sampling/importance_sampling_ratio/max": 1.6345354318618774, "sampling/importance_sampling_ratio/mean": 1.000231385231018, "sampling/importance_sampling_ratio/min": 0.53977370262146, "sampling/sampling_logp_difference/max": 0.616605281829834, "sampling/sampling_logp_difference/mean": 0.013502916321158409, "step": 1123 }, { "clip_ratio/high_max": 0.011966584250330925, "clip_ratio/high_mean": 0.004558057524263859, "clip_ratio/low_mean": 0.003836317453533411, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008394375443458557, "entropy": 0.49355050921440125, "epoch": 7.698630136986301, "grad_norm": 1.579294645091842, "kl": 0.8973422646522522, "learning_rate": 3.0770547945205477e-07, "loss": 0.0108, "step": 1124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1680.0, "completions/max_terminated_length": 1680.0, "completions/mean_length": 262.8214416503906, "completions/mean_terminated_length": 262.8214416503906, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "entropy": 0.690902590751648, "epoch": 7.705479452054795, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.5894384291640233, "kl": 0.6547810435295105, "learning_rate": 3.075342465753424e-07, "loss": -0.0583, "num_tokens": 12385368.0, "reward": 0.8491286039352417, "reward_std": 0.04356304183602333, "rewards/check_gptzero_func/mean": 0.8491286039352417, "rewards/check_gptzero_func/std": 0.26139551401138306, "sampling/importance_sampling_ratio/max": 1.5043816566467285, "sampling/importance_sampling_ratio/mean": 0.9997866749763489, "sampling/importance_sampling_ratio/min": 0.6190225481987, "sampling/sampling_logp_difference/max": 0.4796135425567627, "sampling/sampling_logp_difference/mean": 0.017501123249530792, "step": 1125 }, { "clip_ratio/high_max": 0.011644154787063599, "clip_ratio/high_mean": 0.00669053802266717, "clip_ratio/low_mean": 0.003452291013672948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010142828337848186, "entropy": 0.6920239329338074, "epoch": 7.712328767123288, "grad_norm": 1.3945688917066021, "kl": 0.6404417753219604, "learning_rate": 3.073630136986301e-07, "loss": -0.0667, "step": 1126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1968.0, "completions/max_terminated_length": 1968.0, "completions/mean_length": 166.1428680419922, "completions/mean_terminated_length": 166.1428680419922, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.3325320780277252, "epoch": 7.719178082191781, "frac_reward_zero_std": 0.2142857313156128, "grad_norm": 1.8104986060157497, "kl": 0.7321409583091736, "learning_rate": 3.0719178082191777e-07, "loss": 0.0472, "num_tokens": 12399502.0, "reward": 0.9058110117912292, "reward_std": 0.02701496332883835, "rewards/check_gptzero_func/mean": 0.9058110117912292, "rewards/check_gptzero_func/std": 0.196963369846344, "sampling/importance_sampling_ratio/max": 1.4740182161331177, "sampling/importance_sampling_ratio/mean": 1.0002620220184326, "sampling/importance_sampling_ratio/min": 0.48375868797302246, "sampling/sampling_logp_difference/max": 0.7261691093444824, "sampling/sampling_logp_difference/mean": 0.009432060644030571, "step": 1127 }, { "clip_ratio/high_max": 0.015304068103432655, "clip_ratio/high_mean": 0.004540052264928818, "clip_ratio/low_mean": 0.0037643909454345703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008304443210363388, "entropy": 0.33300039172172546, "epoch": 7.726027397260274, "grad_norm": 1.5998937147145966, "kl": 0.7298762202262878, "learning_rate": 3.0702054794520547e-07, "loss": 0.0381, "step": 1128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1529.0, "completions/mean_length": 277.5, "completions/mean_terminated_length": 228.0, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "entropy": 0.5509417653083801, "epoch": 7.732876712328768, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.3985912246231742, "kl": 0.7460474967956543, "learning_rate": 3.068493150684932e-07, "loss": 0.097, "num_tokens": 12419450.0, "reward": 0.8097063302993774, "reward_std": 0.07614926248788834, "rewards/check_gptzero_func/mean": 0.8097062706947327, "rewards/check_gptzero_func/std": 0.3091267943382263, "sampling/importance_sampling_ratio/max": 1.548159122467041, "sampling/importance_sampling_ratio/mean": 0.9995267987251282, "sampling/importance_sampling_ratio/min": 0.6536650657653809, "sampling/sampling_logp_difference/max": 0.43706655502319336, "sampling/sampling_logp_difference/mean": 0.015994224697351456, "step": 1129 }, { "clip_ratio/high_max": 0.017286084592342377, "clip_ratio/high_mean": 0.009355561807751656, "clip_ratio/low_mean": 0.0038367912638932467, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.013192353770136833, "entropy": 0.5520597100257874, "epoch": 7.739726027397261, "grad_norm": 1.2349649935664044, "kl": 0.7444693446159363, "learning_rate": 3.0667808219178083e-07, "loss": 0.0895, "step": 1130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1211.0, "completions/mean_length": 195.6428680419922, "completions/mean_terminated_length": 91.77777862548828, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.2496231496334076, "epoch": 7.7465753424657535, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.3527658168198107, "kl": 0.7579542398452759, "learning_rate": 3.065068493150685e-07, "loss": 0.0843, "num_tokens": 12435476.0, "reward": 0.9187570810317993, "reward_std": 0.05282645672559738, "rewards/check_gptzero_func/mean": 0.9187570214271545, "rewards/check_gptzero_func/std": 0.18028856813907623, "sampling/importance_sampling_ratio/max": 1.4022599458694458, "sampling/importance_sampling_ratio/mean": 0.9995346069335938, "sampling/importance_sampling_ratio/min": 0.6178499460220337, "sampling/sampling_logp_difference/max": 0.4815096855163574, "sampling/sampling_logp_difference/mean": 0.006442150566726923, "step": 1131 }, { "clip_ratio/high_max": 0.028256069868803024, "clip_ratio/high_mean": 0.004639354068785906, "clip_ratio/low_mean": 0.002920714905485511, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00756006920710206, "entropy": 0.2494928389787674, "epoch": 7.7534246575342465, "grad_norm": 1.1835876829384429, "kl": 0.7586169242858887, "learning_rate": 3.0633561643835613e-07, "loss": 0.0774, "step": 1132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1541.0, "completions/max_terminated_length": 1541.0, "completions/mean_length": 263.5714416503906, "completions/mean_terminated_length": 263.5714416503906, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "entropy": 0.6036137938499451, "epoch": 7.760273972602739, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.4794575032985098, "kl": 0.7709540724754333, "learning_rate": 3.0616438356164383e-07, "loss": 0.0407, "num_tokens": 12454972.0, "reward": 0.8431532979011536, "reward_std": 0.12156232446432114, "rewards/check_gptzero_func/mean": 0.8431532979011536, "rewards/check_gptzero_func/std": 0.28238344192504883, "sampling/importance_sampling_ratio/max": 1.4096808433532715, "sampling/importance_sampling_ratio/mean": 0.9993340373039246, "sampling/importance_sampling_ratio/min": 0.5826366543769836, "sampling/sampling_logp_difference/max": 0.5401915311813354, "sampling/sampling_logp_difference/mean": 0.016822589561343193, "step": 1133 }, { "clip_ratio/high_max": 0.009218054823577404, "clip_ratio/high_mean": 0.005050335079431534, "clip_ratio/low_mean": 0.0038037982303649187, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008854134008288383, "entropy": 0.6045476794242859, "epoch": 7.767123287671232, "grad_norm": 1.3841747735901435, "kl": 0.7667460441589355, "learning_rate": 3.059931506849315e-07, "loss": 0.0324, "step": 1134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1105.0, "completions/max_terminated_length": 1105.0, "completions/mean_length": 254.98214721679688, "completions/mean_terminated_length": 254.98214721679688, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.38770171999931335, "epoch": 7.773972602739726, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.5074097659284782, "kl": 0.7917433977127075, "learning_rate": 3.0582191780821913e-07, "loss": 0.0457, "num_tokens": 12475397.0, "reward": 0.8734393119812012, "reward_std": 0.03735087811946869, "rewards/check_gptzero_func/mean": 0.8734392523765564, "rewards/check_gptzero_func/std": 0.26169711351394653, "sampling/importance_sampling_ratio/max": 1.4270943403244019, "sampling/importance_sampling_ratio/mean": 0.99974125623703, "sampling/importance_sampling_ratio/min": 0.6802839636802673, "sampling/sampling_logp_difference/max": 0.3852449655532837, "sampling/sampling_logp_difference/mean": 0.012076695449650288, "step": 1135 }, { "clip_ratio/high_max": 0.011527377180755138, "clip_ratio/high_mean": 0.0053830621764063835, "clip_ratio/low_mean": 0.004878460429608822, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.01026152167469263, "entropy": 0.3876477777957916, "epoch": 7.780821917808219, "grad_norm": 1.3153390837242744, "kl": 0.7866886854171753, "learning_rate": 3.056506849315068e-07, "loss": 0.038, "step": 1136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 327.0, "completions/mean_length": 190.60714721679688, "completions/mean_terminated_length": 86.55555725097656, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.17345701158046722, "epoch": 7.787671232876712, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 0.5062792222965455, "kl": 0.7672613859176636, "learning_rate": 3.0547945205479454e-07, "loss": 0.3438, "num_tokens": 12491369.0, "reward": 0.9541551470756531, "reward_std": 0.004803188145160675, "rewards/check_gptzero_func/mean": 0.9541550874710083, "rewards/check_gptzero_func/std": 0.12679365277290344, "sampling/importance_sampling_ratio/max": 1.3697670698165894, "sampling/importance_sampling_ratio/mean": 0.9997021555900574, "sampling/importance_sampling_ratio/min": 0.7527708411216736, "sampling/sampling_logp_difference/max": 0.31464076042175293, "sampling/sampling_logp_difference/mean": 0.00459644477814436, "step": 1137 }, { "clip_ratio/high_max": 0.03096330352127552, "clip_ratio/high_mean": 0.004645172040909529, "clip_ratio/low_mean": 0.0038456558249890804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00849082786589861, "entropy": 0.172690287232399, "epoch": 7.794520547945205, "grad_norm": 0.41484094221222145, "kl": 0.7690775990486145, "learning_rate": 3.053082191780822e-07, "loss": 0.3417, "step": 1138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 308.0, "completions/mean_length": 185.5178680419922, "completions/mean_terminated_length": 81.27777862548828, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.15801317989826202, "epoch": 7.801369863013699, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 0.6041320579314362, "kl": 0.7064151763916016, "learning_rate": 3.0513698630136984e-07, "loss": 0.4373, "num_tokens": 12506696.0, "reward": 0.9186637997627258, "reward_std": 0.042553845793008804, "rewards/check_gptzero_func/mean": 0.9186637997627258, "rewards/check_gptzero_func/std": 0.2234858274459839, "sampling/importance_sampling_ratio/max": 1.201270341873169, "sampling/importance_sampling_ratio/mean": 1.0001949071884155, "sampling/importance_sampling_ratio/min": 0.7865523099899292, "sampling/sampling_logp_difference/max": 0.2400960922241211, "sampling/sampling_logp_difference/mean": 0.004205556120723486, "step": 1139 }, { "clip_ratio/high_max": 0.010339734144508839, "clip_ratio/high_mean": 0.002259272849187255, "clip_ratio/low_mean": 0.008492453023791313, "clip_ratio/low_min": 0.0005932957865297794, "clip_ratio/region_mean": 0.010751725174486637, "entropy": 0.15765412151813507, "epoch": 7.808219178082192, "grad_norm": 0.43378804137278953, "kl": 0.7093515396118164, "learning_rate": 3.0496575342465754e-07, "loss": 0.4349, "step": 1140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 231.58929443359375, "completions/mean_terminated_length": 181.25454711914062, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.3642177879810333, "epoch": 7.815068493150685, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.4214719417378388, "kl": 0.6630176901817322, "learning_rate": 3.047945205479452e-07, "loss": 0.1045, "num_tokens": 12525549.0, "reward": 0.8386561274528503, "reward_std": 0.032192397862672806, "rewards/check_gptzero_func/mean": 0.8386560678482056, "rewards/check_gptzero_func/std": 0.2756265103816986, "sampling/importance_sampling_ratio/max": 1.4256657361984253, "sampling/importance_sampling_ratio/mean": 0.9998956918716431, "sampling/importance_sampling_ratio/min": 0.6873502731323242, "sampling/sampling_logp_difference/max": 0.3749113082885742, "sampling/sampling_logp_difference/mean": 0.009964216500520706, "step": 1141 }, { "clip_ratio/high_max": 0.012869564816355705, "clip_ratio/high_mean": 0.005265682470053434, "clip_ratio/low_mean": 0.0030066368635743856, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008272320032119751, "entropy": 0.364512175321579, "epoch": 7.821917808219178, "grad_norm": 1.0406250501335177, "kl": 0.666362464427948, "learning_rate": 3.0462328767123284e-07, "loss": 0.0986, "step": 1142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1737.0, "completions/max_terminated_length": 1737.0, "completions/mean_length": 290.51788330078125, "completions/mean_terminated_length": 290.51788330078125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.7809233069419861, "epoch": 7.828767123287671, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.535409465376778, "kl": 0.5841965079307556, "learning_rate": 3.044520547945205e-07, "loss": 0.0005, "num_tokens": 12547190.0, "reward": 0.8373463153839111, "reward_std": 0.06630125641822815, "rewards/check_gptzero_func/mean": 0.8373462557792664, "rewards/check_gptzero_func/std": 0.28816691040992737, "sampling/importance_sampling_ratio/max": 1.4514727592468262, "sampling/importance_sampling_ratio/mean": 0.9998099207878113, "sampling/importance_sampling_ratio/min": 0.6517046093940735, "sampling/sampling_logp_difference/max": 0.42816388607025146, "sampling/sampling_logp_difference/mean": 0.020034343004226685, "step": 1143 }, { "clip_ratio/high_max": 0.01640000008046627, "clip_ratio/high_mean": 0.0072627379558980465, "clip_ratio/low_mean": 0.004910993855446577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012173732742667198, "entropy": 0.7791885733604431, "epoch": 7.835616438356165, "grad_norm": 1.3837439799541165, "kl": 0.5833797454833984, "learning_rate": 3.042808219178082e-07, "loss": -0.0079, "step": 1144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714328289032, "completions/max_length": 3000.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 375.64288330078125, "completions/mean_terminated_length": 227.09434509277344, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.6608626246452332, "epoch": 7.842465753424658, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.1031174498834062, "kl": 0.6408026814460754, "learning_rate": 3.041095890410959e-07, "loss": 0.2924, "num_tokens": 12572698.0, "reward": 0.818448007106781, "reward_std": 0.06631492078304291, "rewards/check_gptzero_func/mean": 0.818448007106781, "rewards/check_gptzero_func/std": 0.3213970959186554, "sampling/importance_sampling_ratio/max": 1.4249401092529297, "sampling/importance_sampling_ratio/mean": 0.9997954368591309, "sampling/importance_sampling_ratio/min": 0.6149374842643738, "sampling/sampling_logp_difference/max": 0.4862346649169922, "sampling/sampling_logp_difference/mean": 0.014333906583487988, "step": 1145 }, { "clip_ratio/high_max": 0.008409506641328335, "clip_ratio/high_mean": 0.003925635479390621, "clip_ratio/low_mean": 0.0030279119964689016, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006953547243028879, "entropy": 0.6604447364807129, "epoch": 7.8493150684931505, "grad_norm": 1.1111790299667044, "kl": 0.6405066251754761, "learning_rate": 3.0393835616438355e-07, "loss": 0.2865, "step": 1146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 881.0, "completions/mean_length": 171.58929443359375, "completions/mean_terminated_length": 120.16363525390625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.43380147218704224, "epoch": 7.8561643835616435, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.448089956164157, "kl": 0.8280947804450989, "learning_rate": 3.0376712328767125e-07, "loss": 0.2508, "num_tokens": 12587275.0, "reward": 0.8869720101356506, "reward_std": 0.08232080936431885, "rewards/check_gptzero_func/mean": 0.8869719505310059, "rewards/check_gptzero_func/std": 0.2571820020675659, "sampling/importance_sampling_ratio/max": 1.46464204788208, "sampling/importance_sampling_ratio/mean": 1.0001580715179443, "sampling/importance_sampling_ratio/min": 0.7554793357849121, "sampling/sampling_logp_difference/max": 0.3816108703613281, "sampling/sampling_logp_difference/mean": 0.009985667653381824, "step": 1147 }, { "clip_ratio/high_max": 0.02222222276031971, "clip_ratio/high_mean": 0.0035267856437712908, "clip_ratio/low_mean": 0.00385810318402946, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007384888827800751, "entropy": 0.4338769316673279, "epoch": 7.863013698630137, "grad_norm": 1.2662410095584047, "kl": 0.8292988538742065, "learning_rate": 3.035958904109589e-07, "loss": 0.244, "step": 1148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 110.05357360839844, "completions/mean_terminated_length": 110.05357360839844, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.15884025394916534, "epoch": 7.86986301369863, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.7105690046468127, "kl": 0.8855603933334351, "learning_rate": 3.0342465753424656e-07, "loss": -0.0198, "num_tokens": 12598308.0, "reward": 0.898908257484436, "reward_std": 0.061126165091991425, "rewards/check_gptzero_func/mean": 0.8989081978797913, "rewards/check_gptzero_func/std": 0.2648211121559143, "sampling/importance_sampling_ratio/max": 1.3200336694717407, "sampling/importance_sampling_ratio/mean": 0.9997227787971497, "sampling/importance_sampling_ratio/min": 0.557388186454773, "sampling/sampling_logp_difference/max": 0.5844933986663818, "sampling/sampling_logp_difference/mean": 0.004284663125872612, "step": 1149 }, { "clip_ratio/high_max": 0.025834230706095695, "clip_ratio/high_mean": 0.004888989496976137, "clip_ratio/low_mean": 0.0030316710472106934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007920661009848118, "entropy": 0.15817832946777344, "epoch": 7.876712328767123, "grad_norm": 1.2820229545687172, "kl": 0.8855250477790833, "learning_rate": 3.032534246575342e-07, "loss": -0.0273, "step": 1150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1551.0, "completions/max_terminated_length": 1551.0, "completions/mean_length": 223.9107208251953, "completions/mean_terminated_length": 223.9107208251953, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.5321089625358582, "epoch": 7.883561643835616, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.7031660859404618, "kl": 0.8276859521865845, "learning_rate": 3.030821917808219e-07, "loss": 0.0043, "num_tokens": 12615317.0, "reward": 0.8581253886222839, "reward_std": 0.048073358833789825, "rewards/check_gptzero_func/mean": 0.8581252694129944, "rewards/check_gptzero_func/std": 0.25715458393096924, "sampling/importance_sampling_ratio/max": 1.4017506837844849, "sampling/importance_sampling_ratio/mean": 1.0007351636886597, "sampling/importance_sampling_ratio/min": 0.6536219120025635, "sampling/sampling_logp_difference/max": 0.42522621154785156, "sampling/sampling_logp_difference/mean": 0.015188895165920258, "step": 1151 }, { "clip_ratio/high_max": 0.01312175765633583, "clip_ratio/high_mean": 0.00663952948525548, "clip_ratio/low_mean": 0.003420740133151412, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010060268454253674, "entropy": 0.5347602963447571, "epoch": 7.890410958904109, "grad_norm": 1.471660714783174, "kl": 0.7909761071205139, "learning_rate": 3.0291095890410956e-07, "loss": -0.0045, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1309.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 153.0178680419922, "completions/mean_terminated_length": 153.0178680419922, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.27213215827941895, "epoch": 7.897260273972603, "frac_reward_zero_std": 0.2857142984867096, "grad_norm": 1.9363534702533123, "kl": 1.0379420518875122, "learning_rate": 3.027397260273972e-07, "loss": 0.043, "num_tokens": 12628604.0, "reward": 0.8919819593429565, "reward_std": 0.034790780395269394, "rewards/check_gptzero_func/mean": 0.8919818997383118, "rewards/check_gptzero_func/std": 0.28028836846351624, "sampling/importance_sampling_ratio/max": 1.8537036180496216, "sampling/importance_sampling_ratio/mean": 0.9996412992477417, "sampling/importance_sampling_ratio/min": 0.6622567772865295, "sampling/sampling_logp_difference/max": 0.6171855926513672, "sampling/sampling_logp_difference/mean": 0.008460323326289654, "step": 1153 }, { "clip_ratio/high_max": 0.020206023007631302, "clip_ratio/high_mean": 0.005187701899558306, "clip_ratio/low_mean": 0.005166254937648773, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010353957302868366, "entropy": 0.2729319930076599, "epoch": 7.904109589041096, "grad_norm": 1.597349899252096, "kl": 1.025299072265625, "learning_rate": 3.0256849315068497e-07, "loss": 0.0333, "step": 1154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 256.4464416503906, "completions/mean_terminated_length": 256.4464416503906, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 0.48432213068008423, "epoch": 7.910958904109589, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.6018306707186567, "kl": 0.8192325234413147, "learning_rate": 3.023972602739726e-07, "loss": 0.0505, "num_tokens": 12647429.0, "reward": 0.8388221859931946, "reward_std": 0.051374055445194244, "rewards/check_gptzero_func/mean": 0.8388221859931946, "rewards/check_gptzero_func/std": 0.2984768748283386, "sampling/importance_sampling_ratio/max": 1.5538508892059326, "sampling/importance_sampling_ratio/mean": 1.0003010034561157, "sampling/importance_sampling_ratio/min": 0.5937784314155579, "sampling/sampling_logp_difference/max": 0.5212490558624268, "sampling/sampling_logp_difference/mean": 0.01418600045144558, "step": 1155 }, { "clip_ratio/high_max": 0.008547008968889713, "clip_ratio/high_mean": 0.004534517414867878, "clip_ratio/low_mean": 0.0026749137323349714, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007209430914372206, "entropy": 0.4859085977077484, "epoch": 7.917808219178082, "grad_norm": 1.4847154463531747, "kl": 0.8150781989097595, "learning_rate": 3.0222602739726027e-07, "loss": 0.0422, "step": 1156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2528.0, "completions/max_terminated_length": 2528.0, "completions/mean_length": 298.14288330078125, "completions/mean_terminated_length": 298.14288330078125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.5753942131996155, "epoch": 7.924657534246576, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.493692712412627, "kl": 1.0746949911117554, "learning_rate": 3.020547945205479e-07, "loss": -0.0253, "num_tokens": 12668839.0, "reward": 0.8410034775733948, "reward_std": 0.08202051371335983, "rewards/check_gptzero_func/mean": 0.84100341796875, "rewards/check_gptzero_func/std": 0.3123722970485687, "sampling/importance_sampling_ratio/max": 1.4832619428634644, "sampling/importance_sampling_ratio/mean": 0.9997640252113342, "sampling/importance_sampling_ratio/min": 0.6701422929763794, "sampling/sampling_logp_difference/max": 0.4002652168273926, "sampling/sampling_logp_difference/mean": 0.015647809952497482, "step": 1157 }, { "clip_ratio/high_max": 0.009428032673895359, "clip_ratio/high_mean": 0.005297049880027771, "clip_ratio/low_mean": 0.0028700679540634155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008167117834091187, "entropy": 0.5773094892501831, "epoch": 7.931506849315069, "grad_norm": 1.3719263212146864, "kl": 1.0249016284942627, "learning_rate": 3.018835616438356e-07, "loss": -0.034, "step": 1158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1476.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 152.1607208251953, "completions/mean_terminated_length": 152.1607208251953, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.29582685232162476, "epoch": 7.938356164383562, "frac_reward_zero_std": 0.0, "grad_norm": 1.8122945076325419, "kl": 0.8352473974227905, "learning_rate": 3.0171232876712327e-07, "loss": 0.0012, "num_tokens": 12682784.0, "reward": 0.9339900016784668, "reward_std": 0.011662838980555534, "rewards/check_gptzero_func/mean": 0.933989942073822, "rewards/check_gptzero_func/std": 0.18799695372581482, "sampling/importance_sampling_ratio/max": 1.3809195756912231, "sampling/importance_sampling_ratio/mean": 0.9998083114624023, "sampling/importance_sampling_ratio/min": 0.7027292251586914, "sampling/sampling_logp_difference/max": 0.3527836799621582, "sampling/sampling_logp_difference/mean": 0.008619992062449455, "step": 1159 }, { "clip_ratio/high_max": 0.016933638602495193, "clip_ratio/high_mean": 0.004936705809086561, "clip_ratio/low_mean": 0.0037922640331089497, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008728970773518085, "entropy": 0.29612138867378235, "epoch": 7.945205479452055, "grad_norm": 1.5720911284487649, "kl": 0.8245553970336914, "learning_rate": 3.015410958904109e-07, "loss": -0.0081, "step": 1160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1269.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 130.35714721679688, "completions/mean_terminated_length": 130.35714721679688, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.30271419882774353, "epoch": 7.9520547945205475, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.9018757245425286, "kl": 1.046457052230835, "learning_rate": 3.013698630136986e-07, "loss": -0.0468, "num_tokens": 12695080.0, "reward": 0.9525036811828613, "reward_std": 0.015051580965518951, "rewards/check_gptzero_func/mean": 0.9525036215782166, "rewards/check_gptzero_func/std": 0.16291260719299316, "sampling/importance_sampling_ratio/max": 1.315550684928894, "sampling/importance_sampling_ratio/mean": 0.9997978806495667, "sampling/importance_sampling_ratio/min": 0.672551691532135, "sampling/sampling_logp_difference/max": 0.39667630195617676, "sampling/sampling_logp_difference/mean": 0.007415437139570713, "step": 1161 }, { "clip_ratio/high_max": 0.025236593559384346, "clip_ratio/high_mean": 0.004161091987043619, "clip_ratio/low_mean": 0.002464222488924861, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0066253142431378365, "entropy": 0.30304691195487976, "epoch": 7.958904109589041, "grad_norm": 1.5510137693403767, "kl": 1.0484193563461304, "learning_rate": 3.011986301369863e-07, "loss": -0.0556, "step": 1162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2227.0, "completions/max_terminated_length": 2227.0, "completions/mean_length": 280.8571472167969, "completions/mean_terminated_length": 280.8571472167969, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.6434864401817322, "epoch": 7.965753424657534, "frac_reward_zero_std": 0.0, "grad_norm": 1.4340942745256458, "kl": 0.8368672132492065, "learning_rate": 3.01027397260274e-07, "loss": 0.0952, "num_tokens": 12715674.0, "reward": 0.8259575963020325, "reward_std": 0.07223312556743622, "rewards/check_gptzero_func/mean": 0.8259575963020325, "rewards/check_gptzero_func/std": 0.28930336236953735, "sampling/importance_sampling_ratio/max": 1.436427116394043, "sampling/importance_sampling_ratio/mean": 1.0007765293121338, "sampling/importance_sampling_ratio/min": 0.6871601939201355, "sampling/sampling_logp_difference/max": 0.37518787384033203, "sampling/sampling_logp_difference/mean": 0.016322143375873566, "step": 1163 }, { "clip_ratio/high_max": 0.009265387430787086, "clip_ratio/high_mean": 0.004526267293840647, "clip_ratio/low_mean": 0.003572898218408227, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008099165745079517, "entropy": 0.6448726654052734, "epoch": 7.972602739726027, "grad_norm": 1.3143790616523767, "kl": 0.8378929495811462, "learning_rate": 3.0085616438356163e-07, "loss": 0.0877, "step": 1164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 240.71429443359375, "completions/mean_terminated_length": 240.71429443359375, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 0.3658020794391632, "epoch": 7.97945205479452, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.6468960890277304, "kl": 0.8610500693321228, "learning_rate": 3.0068493150684933e-07, "loss": -0.013, "num_tokens": 12734476.0, "reward": 0.8786619305610657, "reward_std": 0.030109787359833717, "rewards/check_gptzero_func/mean": 0.8786619305610657, "rewards/check_gptzero_func/std": 0.22587449848651886, "sampling/importance_sampling_ratio/max": 1.3350937366485596, "sampling/importance_sampling_ratio/mean": 0.9996596574783325, "sampling/importance_sampling_ratio/min": 0.614445686340332, "sampling/sampling_logp_difference/max": 0.48703479766845703, "sampling/sampling_logp_difference/mean": 0.010783758945763111, "step": 1165 }, { "clip_ratio/high_max": 0.010358565486967564, "clip_ratio/high_mean": 0.004176501650363207, "clip_ratio/low_mean": 0.0027084501925855875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006884952075779438, "entropy": 0.36672529578208923, "epoch": 7.986301369863014, "grad_norm": 1.4074700180200186, "kl": 0.81610107421875, "learning_rate": 3.00513698630137e-07, "loss": -0.0214, "step": 1166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 982.0, "completions/mean_length": 236.75001525878906, "completions/mean_terminated_length": 134.40740966796875, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.43282610177993774, "epoch": 7.993150684931507, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.462014124082645, "kl": 0.6790090203285217, "learning_rate": 3.0034246575342463e-07, "loss": 0.054, "num_tokens": 12752472.0, "reward": 0.8512150049209595, "reward_std": 0.06668473035097122, "rewards/check_gptzero_func/mean": 0.8512150049209595, "rewards/check_gptzero_func/std": 0.2920803427696228, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9996919631958008, "sampling/importance_sampling_ratio/min": 0.47405120730400085, "sampling/sampling_logp_difference/max": 0.7725130319595337, "sampling/sampling_logp_difference/mean": 0.013121488504111767, "step": 1167 }, { "clip_ratio/high_max": 0.018497757613658905, "clip_ratio/high_mean": 0.006770423613488674, "clip_ratio/low_mean": 0.006360155064612627, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.01313057728111744, "entropy": 0.433682382106781, "epoch": 8.0, "grad_norm": 1.2046483430900445, "kl": 0.6771628260612488, "learning_rate": 3.0017123287671234e-07, "loss": 0.0474, "step": 1168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1368.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 230.48214721679688, "completions/mean_terminated_length": 230.48214721679688, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 0.4758428931236267, "epoch": 8.006849315068493, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.5734532987155179, "kl": 0.7541128993034363, "learning_rate": 3e-07, "loss": 0.0043, "num_tokens": 12770213.0, "reward": 0.8471726775169373, "reward_std": 0.04913893714547157, "rewards/check_gptzero_func/mean": 0.847172737121582, "rewards/check_gptzero_func/std": 0.2951652705669403, "sampling/importance_sampling_ratio/max": 1.5971448421478271, "sampling/importance_sampling_ratio/mean": 1.0002186298370361, "sampling/importance_sampling_ratio/min": 0.6584904193878174, "sampling/sampling_logp_difference/max": 0.4682176113128662, "sampling/sampling_logp_difference/mean": 0.013129300437867641, "step": 1169 }, { "clip_ratio/high_max": 0.013320647180080414, "clip_ratio/high_mean": 0.004662927705794573, "clip_ratio/low_mean": 0.003184609580785036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007847537286579609, "entropy": 0.47518500685691833, "epoch": 8.013698630136986, "grad_norm": 1.4431300589320286, "kl": 0.7533807158470154, "learning_rate": 2.9982876712328764e-07, "loss": -0.0036, "step": 1170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714328289032, "completions/max_length": 3000.0, "completions/max_terminated_length": 1274.0, "completions/mean_length": 383.1607360839844, "completions/mean_terminated_length": 235.03773498535156, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.5671252012252808, "epoch": 8.020547945205479, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.1232098758849924, "kl": 0.6024059057235718, "learning_rate": 2.996575342465753e-07, "loss": 0.0781, "num_tokens": 12796494.0, "reward": 0.7682260870933533, "reward_std": 0.02649890072643757, "rewards/check_gptzero_func/mean": 0.7682260870933533, "rewards/check_gptzero_func/std": 0.35903945565223694, "sampling/importance_sampling_ratio/max": 1.3317593336105347, "sampling/importance_sampling_ratio/mean": 1.0004839897155762, "sampling/importance_sampling_ratio/min": 0.6380586624145508, "sampling/sampling_logp_difference/max": 0.4493250846862793, "sampling/sampling_logp_difference/mean": 0.014540675096213818, "step": 1171 }, { "clip_ratio/high_max": 0.009086561389267445, "clip_ratio/high_mean": 0.003929409198462963, "clip_ratio/low_mean": 0.0027119985315948725, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006641407962888479, "entropy": 0.5687025785446167, "epoch": 8.027397260273972, "grad_norm": 2.4145706710466284, "kl": 0.5997697710990906, "learning_rate": 2.9948630136986304e-07, "loss": 0.0719, "step": 1172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 183.82144165039062, "completions/mean_terminated_length": 183.82144165039062, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.4403926432132721, "epoch": 8.034246575342467, "frac_reward_zero_std": 0.25, "grad_norm": 1.826631478460778, "kl": 0.8933321237564087, "learning_rate": 2.993150684931507e-07, "loss": 0.0261, "num_tokens": 12811006.0, "reward": 0.890669584274292, "reward_std": 0.045418426394462585, "rewards/check_gptzero_func/mean": 0.8906695246696472, "rewards/check_gptzero_func/std": 0.22913816571235657, "sampling/importance_sampling_ratio/max": 1.4124516248703003, "sampling/importance_sampling_ratio/mean": 0.9997294545173645, "sampling/importance_sampling_ratio/min": 0.663981020450592, "sampling/sampling_logp_difference/max": 0.40950167179107666, "sampling/sampling_logp_difference/mean": 0.011468993499875069, "step": 1173 }, { "clip_ratio/high_max": 0.014325069263577461, "clip_ratio/high_mean": 0.004843329545110464, "clip_ratio/low_mean": 0.0022896325681358576, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007132962346076965, "entropy": 0.4402766823768616, "epoch": 8.04109589041096, "grad_norm": 1.7196021344222858, "kl": 0.8908277153968811, "learning_rate": 2.9914383561643834e-07, "loss": 0.0167, "step": 1174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1769.0, "completions/mean_length": 258.2857360839844, "completions/mean_terminated_length": 208.4363555908203, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.3806353211402893, "epoch": 8.047945205479452, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.3280291688035584, "kl": 0.8811154961585999, "learning_rate": 2.98972602739726e-07, "loss": 0.2491, "num_tokens": 12830506.0, "reward": 0.9058175086975098, "reward_std": 0.03502273187041283, "rewards/check_gptzero_func/mean": 0.905817449092865, "rewards/check_gptzero_func/std": 0.22863489389419556, "sampling/importance_sampling_ratio/max": 1.2982909679412842, "sampling/importance_sampling_ratio/mean": 1.0001362562179565, "sampling/importance_sampling_ratio/min": 0.7161467671394348, "sampling/sampling_logp_difference/max": 0.33387017250061035, "sampling/sampling_logp_difference/mean": 0.009979140013456345, "step": 1175 }, { "clip_ratio/high_max": 0.013134460896253586, "clip_ratio/high_mean": 0.0042542386800050735, "clip_ratio/low_mean": 0.002998414682224393, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00725265359506011, "entropy": 0.3810538351535797, "epoch": 8.054794520547945, "grad_norm": 1.1828211493153, "kl": 0.8800176978111267, "learning_rate": 2.988013698630137e-07, "loss": 0.2418, "step": 1176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 158.30357360839844, "completions/mean_terminated_length": 158.30357360839844, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.33678290247917175, "epoch": 8.061643835616438, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.8083035369864116, "kl": 0.93620765209198, "learning_rate": 2.9863013698630135e-07, "loss": 0.0228, "num_tokens": 12844269.0, "reward": 0.9063292741775513, "reward_std": 0.026510098949074745, "rewards/check_gptzero_func/mean": 0.9063292145729065, "rewards/check_gptzero_func/std": 0.2389296293258667, "sampling/importance_sampling_ratio/max": 1.3967463970184326, "sampling/importance_sampling_ratio/mean": 0.9998412728309631, "sampling/importance_sampling_ratio/min": 0.7226291298866272, "sampling/sampling_logp_difference/max": 0.33414554595947266, "sampling/sampling_logp_difference/mean": 0.009428843855857849, "step": 1177 }, { "clip_ratio/high_max": 0.018727915361523628, "clip_ratio/high_mean": 0.006221506278961897, "clip_ratio/low_mean": 0.004648204892873764, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.010869711637496948, "entropy": 0.3378930389881134, "epoch": 8.068493150684931, "grad_norm": 1.5526897421824033, "kl": 0.9366354942321777, "learning_rate": 2.98458904109589e-07, "loss": 0.0131, "step": 1178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1231.0, "completions/max_terminated_length": 1231.0, "completions/mean_length": 181.25001525878906, "completions/mean_terminated_length": 181.25001525878906, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.33158132433891296, "epoch": 8.075342465753424, "frac_reward_zero_std": 0.25, "grad_norm": 1.7247646554845166, "kl": 0.8153306245803833, "learning_rate": 2.982876712328767e-07, "loss": 0.0219, "num_tokens": 12859773.0, "reward": 0.8952047228813171, "reward_std": 0.03730219602584839, "rewards/check_gptzero_func/mean": 0.8952046632766724, "rewards/check_gptzero_func/std": 0.2243228256702423, "sampling/importance_sampling_ratio/max": 1.4400657415390015, "sampling/importance_sampling_ratio/mean": 0.9996376633644104, "sampling/importance_sampling_ratio/min": 0.6209232211112976, "sampling/sampling_logp_difference/max": 0.4765479564666748, "sampling/sampling_logp_difference/mean": 0.008861358277499676, "step": 1179 }, { "clip_ratio/high_max": 0.013261296786367893, "clip_ratio/high_mean": 0.004418233875185251, "clip_ratio/low_mean": 0.003559085773304105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00797731988132, "entropy": 0.3311351239681244, "epoch": 8.082191780821917, "grad_norm": 1.4889823306866574, "kl": 0.8149115443229675, "learning_rate": 2.9811643835616435e-07, "loss": 0.013, "step": 1180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 177.9107208251953, "completions/mean_terminated_length": 177.9107208251953, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "entropy": 0.40284743905067444, "epoch": 8.08904109589041, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.905972650182208, "kl": 1.0613181591033936, "learning_rate": 2.9794520547945206e-07, "loss": -0.081, "num_tokens": 12874558.0, "reward": 0.8517822623252869, "reward_std": 0.03168122470378876, "rewards/check_gptzero_func/mean": 0.8517822623252869, "rewards/check_gptzero_func/std": 0.2534492313861847, "sampling/importance_sampling_ratio/max": 1.3799598217010498, "sampling/importance_sampling_ratio/mean": 0.9997683763504028, "sampling/importance_sampling_ratio/min": 0.6505492329597473, "sampling/sampling_logp_difference/max": 0.42993831634521484, "sampling/sampling_logp_difference/mean": 0.010815005749464035, "step": 1181 }, { "clip_ratio/high_max": 0.015661446377635002, "clip_ratio/high_mean": 0.004254903178662062, "clip_ratio/low_mean": 0.002777421846985817, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007032325025647879, "entropy": 0.40469616651535034, "epoch": 8.095890410958905, "grad_norm": 1.5941417158288667, "kl": 1.017314076423645, "learning_rate": 2.977739726027397e-07, "loss": -0.0906, "step": 1182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 154.0, "completions/mean_terminated_length": 154.0, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.318376362323761, "epoch": 8.102739726027398, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.8653619648981243, "kl": 0.8540940880775452, "learning_rate": 2.976027397260274e-07, "loss": 0.0155, "num_tokens": 12888232.0, "reward": 0.8638809323310852, "reward_std": 0.09872816503047943, "rewards/check_gptzero_func/mean": 0.8638809323310852, "rewards/check_gptzero_func/std": 0.2640385925769806, "sampling/importance_sampling_ratio/max": 1.445313811302185, "sampling/importance_sampling_ratio/mean": 1.0001477003097534, "sampling/importance_sampling_ratio/min": 0.7637714147567749, "sampling/sampling_logp_difference/max": 0.36832642555236816, "sampling/sampling_logp_difference/mean": 0.008265876211225986, "step": 1183 }, { "clip_ratio/high_max": 0.01392532791942358, "clip_ratio/high_mean": 0.0023960901889950037, "clip_ratio/low_mean": 0.004230692517012358, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006626782473176718, "entropy": 0.31876781582832336, "epoch": 8.10958904109589, "grad_norm": 1.6804281818050968, "kl": 0.8570264577865601, "learning_rate": 2.9743150684931506e-07, "loss": 0.0059, "step": 1184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1038.0, "completions/max_terminated_length": 1038.0, "completions/mean_length": 112.91072082519531, "completions/mean_terminated_length": 112.91072082519531, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.3549945056438446, "epoch": 8.116438356164384, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.838377470535047, "kl": 0.9546455144882202, "learning_rate": 2.972602739726027e-07, "loss": 0.0588, "num_tokens": 12899617.0, "reward": 0.9430805444717407, "reward_std": 0.030643442645668983, "rewards/check_gptzero_func/mean": 0.943080484867096, "rewards/check_gptzero_func/std": 0.20102839171886444, "sampling/importance_sampling_ratio/max": 1.3228039741516113, "sampling/importance_sampling_ratio/mean": 0.999937891960144, "sampling/importance_sampling_ratio/min": 0.749815821647644, "sampling/sampling_logp_difference/max": 0.28792762756347656, "sampling/sampling_logp_difference/mean": 0.009234998375177383, "step": 1185 }, { "clip_ratio/high_max": 0.02568567730486393, "clip_ratio/high_mean": 0.005510426126420498, "clip_ratio/low_mean": 0.0061302571557462215, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.011640683747828007, "entropy": 0.35671013593673706, "epoch": 8.123287671232877, "grad_norm": 1.4876090547379759, "kl": 0.9597861170768738, "learning_rate": 2.970890410958904e-07, "loss": 0.05, "step": 1186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1355.0, "completions/max_terminated_length": 1355.0, "completions/mean_length": 187.05357360839844, "completions/mean_terminated_length": 187.05357360839844, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.466965913772583, "epoch": 8.13013698630137, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.795296871677471, "kl": 0.748691201210022, "learning_rate": 2.9691780821917806e-07, "loss": -0.0278, "num_tokens": 12915004.0, "reward": 0.9462603330612183, "reward_std": 0.020318083465099335, "rewards/check_gptzero_func/mean": 0.9462603330612183, "rewards/check_gptzero_func/std": 0.13429246842861176, "sampling/importance_sampling_ratio/max": 1.4083787202835083, "sampling/importance_sampling_ratio/mean": 1.000045657157898, "sampling/importance_sampling_ratio/min": 0.7076388597488403, "sampling/sampling_logp_difference/max": 0.3458213806152344, "sampling/sampling_logp_difference/mean": 0.01202565897256136, "step": 1187 }, { "clip_ratio/high_max": 0.01381304208189249, "clip_ratio/high_mean": 0.005267465952783823, "clip_ratio/low_mean": 0.003814717987552285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.009082183241844177, "entropy": 0.46868059039115906, "epoch": 8.136986301369863, "grad_norm": 1.6151341396519023, "kl": 0.752593457698822, "learning_rate": 2.967465753424657e-07, "loss": -0.0368, "step": 1188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 216.62501525878906, "completions/mean_terminated_length": 166.01817321777344, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.5959094762802124, "epoch": 8.143835616438356, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.5889813411396572, "kl": 0.967599093914032, "learning_rate": 2.9657534246575336e-07, "loss": -0.1507, "num_tokens": 12932207.0, "reward": 0.9210851192474365, "reward_std": 0.044647376984357834, "rewards/check_gptzero_func/mean": 0.9210850596427917, "rewards/check_gptzero_func/std": 0.19712138175964355, "sampling/importance_sampling_ratio/max": 1.468673586845398, "sampling/importance_sampling_ratio/mean": 1.0002769231796265, "sampling/importance_sampling_ratio/min": 0.6959027647972107, "sampling/sampling_logp_difference/max": 0.38435959815979004, "sampling/sampling_logp_difference/mean": 0.012902325950562954, "step": 1189 }, { "clip_ratio/high_max": 0.012408759444952011, "clip_ratio/high_mean": 0.0036437443923205137, "clip_ratio/low_mean": 0.002618880709633231, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006262625101953745, "entropy": 0.5983219146728516, "epoch": 8.150684931506849, "grad_norm": 1.4458436273007962, "kl": 0.9684619903564453, "learning_rate": 2.964041095890411e-07, "loss": -0.1584, "step": 1190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 991.0, "completions/max_terminated_length": 991.0, "completions/mean_length": 115.83929443359375, "completions/mean_terminated_length": 115.83929443359375, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.30433228611946106, "epoch": 8.157534246575343, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.8134907619898692, "kl": 1.1020535230636597, "learning_rate": 2.9623287671232877e-07, "loss": 0.013, "num_tokens": 12943548.0, "reward": 0.9253097772598267, "reward_std": 0.025393201038241386, "rewards/check_gptzero_func/mean": 0.9253097176551819, "rewards/check_gptzero_func/std": 0.20848999917507172, "sampling/importance_sampling_ratio/max": 1.4058274030685425, "sampling/importance_sampling_ratio/mean": 0.99940025806427, "sampling/importance_sampling_ratio/min": 0.7201342582702637, "sampling/sampling_logp_difference/max": 0.3406260013580322, "sampling/sampling_logp_difference/mean": 0.007911721244454384, "step": 1191 }, { "clip_ratio/high_max": 0.01964636519551277, "clip_ratio/high_mean": 0.004478552378714085, "clip_ratio/low_mean": 0.0023516863584518433, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006830238737165928, "entropy": 0.3055533766746521, "epoch": 8.164383561643836, "grad_norm": 1.582710375975304, "kl": 1.0979458093643188, "learning_rate": 2.960616438356164e-07, "loss": 0.0049, "step": 1192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1533.0, "completions/max_terminated_length": 1533.0, "completions/mean_length": 120.14286041259766, "completions/mean_terminated_length": 120.14286041259766, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.2248939573764801, "epoch": 8.17123287671233, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.7722019049401894, "kl": 1.0392563343048096, "learning_rate": 2.958904109589041e-07, "loss": -0.0037, "num_tokens": 12955266.0, "reward": 0.951785147190094, "reward_std": 0.02161010354757309, "rewards/check_gptzero_func/mean": 0.9517850875854492, "rewards/check_gptzero_func/std": 0.16339761018753052, "sampling/importance_sampling_ratio/max": 1.285720944404602, "sampling/importance_sampling_ratio/mean": 0.9997011423110962, "sampling/importance_sampling_ratio/min": 0.6018628478050232, "sampling/sampling_logp_difference/max": 0.507725715637207, "sampling/sampling_logp_difference/mean": 0.006689551752060652, "step": 1193 }, { "clip_ratio/high_max": 0.016028495505452156, "clip_ratio/high_mean": 0.0034297234378755093, "clip_ratio/low_mean": 0.00255726114846766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005986984819173813, "entropy": 0.22586260735988617, "epoch": 8.178082191780822, "grad_norm": 1.6208990218506703, "kl": 1.0399580001831055, "learning_rate": 2.957191780821918e-07, "loss": -0.0118, "step": 1194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714328289032, "completions/max_length": 3000.0, "completions/max_terminated_length": 1707.0, "completions/mean_length": 383.9464416503906, "completions/mean_terminated_length": 235.86793518066406, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.32781434059143066, "epoch": 8.184931506849315, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.0125357223473208, "kl": 0.6718882322311401, "learning_rate": 2.955479452054794e-07, "loss": 0.0914, "num_tokens": 12981405.0, "reward": 0.8826362490653992, "reward_std": 0.042905282229185104, "rewards/check_gptzero_func/mean": 0.8826361894607544, "rewards/check_gptzero_func/std": 0.2523805797100067, "sampling/importance_sampling_ratio/max": 1.6470491886138916, "sampling/importance_sampling_ratio/mean": 1.000306248664856, "sampling/importance_sampling_ratio/min": 0.6425579190254211, "sampling/sampling_logp_difference/max": 0.49898529052734375, "sampling/sampling_logp_difference/mean": 0.008886542171239853, "step": 1195 }, { "clip_ratio/high_max": 0.01076923031359911, "clip_ratio/high_mean": 0.00309516373090446, "clip_ratio/low_mean": 0.002169098239392042, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005264262668788433, "entropy": 0.32779785990715027, "epoch": 8.191780821917808, "grad_norm": 0.9073102470373543, "kl": 0.6677892804145813, "learning_rate": 2.953767123287671e-07, "loss": 0.086, "step": 1196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1507.0, "completions/max_terminated_length": 1507.0, "completions/mean_length": 199.12501525878906, "completions/mean_terminated_length": 199.12501525878906, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.6430138349533081, "epoch": 8.198630136986301, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.7085531139706118, "kl": 0.78369140625, "learning_rate": 2.952054794520548e-07, "loss": 0.0764, "num_tokens": 12997006.0, "reward": 0.8625314235687256, "reward_std": 0.049478307366371155, "rewards/check_gptzero_func/mean": 0.8625313639640808, "rewards/check_gptzero_func/std": 0.2762446999549866, "sampling/importance_sampling_ratio/max": 1.4139848947525024, "sampling/importance_sampling_ratio/mean": 1.0003576278686523, "sampling/importance_sampling_ratio/min": 0.7050718069076538, "sampling/sampling_logp_difference/max": 0.34945571422576904, "sampling/sampling_logp_difference/mean": 0.015038051642477512, "step": 1197 }, { "clip_ratio/high_max": 0.01409654039889574, "clip_ratio/high_mean": 0.0068641784600913525, "clip_ratio/low_mean": 0.005742951296269894, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.012607129290699959, "entropy": 0.6429473161697388, "epoch": 8.205479452054794, "grad_norm": 1.5227266146893907, "kl": 0.7860205769538879, "learning_rate": 2.9503424657534243e-07, "loss": 0.0671, "step": 1198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 248.1607208251953, "completions/mean_terminated_length": 198.1272735595703, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.43971970677375793, "epoch": 8.212328767123287, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3180452176086352, "kl": 0.9898248314857483, "learning_rate": 2.9486301369863013e-07, "loss": -0.1341, "num_tokens": 13016127.0, "reward": 0.9286828637123108, "reward_std": 0.020116273313760757, "rewards/check_gptzero_func/mean": 0.9286828637123108, "rewards/check_gptzero_func/std": 0.1948767900466919, "sampling/importance_sampling_ratio/max": 1.3534544706344604, "sampling/importance_sampling_ratio/mean": 1.0003479719161987, "sampling/importance_sampling_ratio/min": 0.6879020929336548, "sampling/sampling_logp_difference/max": 0.37410879135131836, "sampling/sampling_logp_difference/mean": 0.011097622103989124, "step": 1199 }, { "clip_ratio/high_max": 0.009159034118056297, "clip_ratio/high_mean": 0.003466889262199402, "clip_ratio/low_mean": 0.003130188677459955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006597078405320644, "entropy": 0.43995121121406555, "epoch": 8.219178082191782, "grad_norm": 1.215933658565213, "kl": 0.9813112616539001, "learning_rate": 2.9469178082191784e-07, "loss": -0.1411, "step": 1200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2004.0, "completions/max_terminated_length": 2004.0, "completions/mean_length": 147.17857360839844, "completions/mean_terminated_length": 147.17857360839844, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 0.3580891489982605, "epoch": 8.226027397260275, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.8000204888632576, "kl": 1.0666568279266357, "learning_rate": 2.945205479452055e-07, "loss": 0.0016, "num_tokens": 13029309.0, "reward": 0.9530077576637268, "reward_std": 0.01234981045126915, "rewards/check_gptzero_func/mean": 0.953007698059082, "rewards/check_gptzero_func/std": 0.15606680512428284, "sampling/importance_sampling_ratio/max": 1.4587066173553467, "sampling/importance_sampling_ratio/mean": 1.0005651712417603, "sampling/importance_sampling_ratio/min": 0.725879967212677, "sampling/sampling_logp_difference/max": 0.3775501251220703, "sampling/sampling_logp_difference/mean": 0.008417868055403233, "step": 1201 }, { "clip_ratio/high_max": 0.014272344298660755, "clip_ratio/high_mean": 0.002974033122882247, "clip_ratio/low_mean": 0.0027422383427619934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005716271698474884, "entropy": 0.35901981592178345, "epoch": 8.232876712328768, "grad_norm": 5.70383235108614, "kl": 1.1176813840866089, "learning_rate": 2.9434931506849314e-07, "loss": -0.0049, "step": 1202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1672.0, "completions/max_terminated_length": 1672.0, "completions/mean_length": 217.1428680419922, "completions/mean_terminated_length": 217.1428680419922, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.4729613959789276, "epoch": 8.23972602739726, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.608698622531311, "kl": 0.8245236277580261, "learning_rate": 2.941780821917808e-07, "loss": 0.0432, "num_tokens": 13046865.0, "reward": 0.8768765330314636, "reward_std": 0.03325953334569931, "rewards/check_gptzero_func/mean": 0.8768764734268188, "rewards/check_gptzero_func/std": 0.2847636342048645, "sampling/importance_sampling_ratio/max": 1.832249641418457, "sampling/importance_sampling_ratio/mean": 0.9997772574424744, "sampling/importance_sampling_ratio/min": 0.7335669994354248, "sampling/sampling_logp_difference/max": 0.6055445671081543, "sampling/sampling_logp_difference/mean": 0.013150613754987717, "step": 1203 }, { "clip_ratio/high_max": 0.009689370170235634, "clip_ratio/high_mean": 0.004171102773398161, "clip_ratio/low_mean": 0.003837799886241555, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00800890289247036, "entropy": 0.4735200107097626, "epoch": 8.246575342465754, "grad_norm": 1.45438386977192, "kl": 0.8249382376670837, "learning_rate": 2.940068493150685e-07, "loss": 0.0347, "step": 1204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1854.0, "completions/max_terminated_length": 1854.0, "completions/mean_length": 254.9107208251953, "completions/mean_terminated_length": 254.9107208251953, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.616551399230957, "epoch": 8.253424657534246, "frac_reward_zero_std": 0.2142857313156128, "grad_norm": 1.5463155248867395, "kl": 0.703364908695221, "learning_rate": 2.9383561643835614e-07, "loss": 0.0466, "num_tokens": 13065770.0, "reward": 0.8789932131767273, "reward_std": 0.05116238817572594, "rewards/check_gptzero_func/mean": 0.8789931535720825, "rewards/check_gptzero_func/std": 0.2640126347541809, "sampling/importance_sampling_ratio/max": 1.4664005041122437, "sampling/importance_sampling_ratio/mean": 1.0004570484161377, "sampling/importance_sampling_ratio/min": 0.482334166765213, "sampling/sampling_logp_difference/max": 0.7291181087493896, "sampling/sampling_logp_difference/mean": 0.015047481283545494, "step": 1205 }, { "clip_ratio/high_max": 0.008139104582369328, "clip_ratio/high_mean": 0.003493484342470765, "clip_ratio/low_mean": 0.003615652909502387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007109136786311865, "entropy": 0.6165398359298706, "epoch": 8.26027397260274, "grad_norm": 1.4160550095881914, "kl": 0.7049447298049927, "learning_rate": 2.936643835616438e-07, "loss": 0.0383, "step": 1206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1519.0, "completions/mean_length": 209.7857208251953, "completions/mean_terminated_length": 159.05453491210938, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.25127819180488586, "epoch": 8.267123287671232, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.4452335162521264, "kl": 0.8886958360671997, "learning_rate": 2.9349315068493144e-07, "loss": -0.0786, "num_tokens": 13082708.0, "reward": 0.9256466627120972, "reward_std": 0.04784844443202019, "rewards/check_gptzero_func/mean": 0.9256466627120972, "rewards/check_gptzero_func/std": 0.22111472487449646, "sampling/importance_sampling_ratio/max": 1.3325012922286987, "sampling/importance_sampling_ratio/mean": 1.0000073909759521, "sampling/importance_sampling_ratio/min": 0.5851320028305054, "sampling/sampling_logp_difference/max": 0.5359177589416504, "sampling/sampling_logp_difference/mean": 0.007139305118471384, "step": 1207 }, { "clip_ratio/high_max": 0.020126724615693092, "clip_ratio/high_mean": 0.004126218147575855, "clip_ratio/low_mean": 0.0024319165386259556, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006558134686201811, "entropy": 0.25203222036361694, "epoch": 8.273972602739725, "grad_norm": 1.1273258132441168, "kl": 0.8636847734451294, "learning_rate": 2.933219178082192e-07, "loss": -0.0851, "step": 1208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2572.0, "completions/max_terminated_length": 2572.0, "completions/mean_length": 191.42857360839844, "completions/mean_terminated_length": 191.42857360839844, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.49676766991615295, "epoch": 8.280821917808218, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.7710699900779223, "kl": 0.9753131866455078, "learning_rate": 2.9315068493150685e-07, "loss": 0.0071, "num_tokens": 13097974.0, "reward": 0.8919949531555176, "reward_std": 0.020238779485225677, "rewards/check_gptzero_func/mean": 0.8919948935508728, "rewards/check_gptzero_func/std": 0.252549409866333, "sampling/importance_sampling_ratio/max": 1.4563497304916382, "sampling/importance_sampling_ratio/mean": 0.9995478391647339, "sampling/importance_sampling_ratio/min": 0.6856617331504822, "sampling/sampling_logp_difference/max": 0.37737083435058594, "sampling/sampling_logp_difference/mean": 0.011478913947939873, "step": 1209 }, { "clip_ratio/high_max": 0.012621915899217129, "clip_ratio/high_mean": 0.0036464461591094732, "clip_ratio/low_mean": 0.0023790602572262287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006025507114827633, "entropy": 0.4968926012516022, "epoch": 8.287671232876713, "grad_norm": 1.6137252809803957, "kl": 0.9690868258476257, "learning_rate": 2.929794520547945e-07, "loss": -0.0018, "step": 1210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2410.0, "completions/mean_length": 245.96429443359375, "completions/mean_terminated_length": 195.89089965820312, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.5093806385993958, "epoch": 8.294520547945206, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.3497379191663008, "kl": 0.8832683563232422, "learning_rate": 2.928082191780822e-07, "loss": 0.2778, "num_tokens": 13116682.0, "reward": 0.9361160397529602, "reward_std": 0.024025866761803627, "rewards/check_gptzero_func/mean": 0.9361160397529602, "rewards/check_gptzero_func/std": 0.18909521400928497, "sampling/importance_sampling_ratio/max": 1.5521422624588013, "sampling/importance_sampling_ratio/mean": 1.0001589059829712, "sampling/importance_sampling_ratio/min": 0.7324614524841309, "sampling/sampling_logp_difference/max": 0.43963611125946045, "sampling/sampling_logp_difference/mean": 0.010782361961901188, "step": 1211 }, { "clip_ratio/high_max": 0.012858979403972626, "clip_ratio/high_mean": 0.003937883302569389, "clip_ratio/low_mean": 0.004888999275863171, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00882688257843256, "entropy": 0.5099411010742188, "epoch": 8.301369863013699, "grad_norm": 1.186977455129808, "kl": 0.8824281096458435, "learning_rate": 2.9263698630136985e-07, "loss": 0.2714, "step": 1212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 671.0, "completions/max_terminated_length": 671.0, "completions/mean_length": 81.64286041259766, "completions/mean_terminated_length": 81.64286041259766, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.2752818167209625, "epoch": 8.308219178082192, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.653563512133936, "kl": 1.1496559381484985, "learning_rate": 2.924657534246575e-07, "loss": 0.082, "num_tokens": 13125868.0, "reward": 0.9529809951782227, "reward_std": 0.037735797464847565, "rewards/check_gptzero_func/mean": 0.9529808759689331, "rewards/check_gptzero_func/std": 0.1624165177345276, "sampling/importance_sampling_ratio/max": 1.1713240146636963, "sampling/importance_sampling_ratio/mean": 0.9991198182106018, "sampling/importance_sampling_ratio/min": 0.7724288105964661, "sampling/sampling_logp_difference/max": 0.25821542739868164, "sampling/sampling_logp_difference/mean": 0.0064475503750145435, "step": 1213 }, { "clip_ratio/high_max": 0.0065170167945325375, "clip_ratio/high_mean": 0.001851300010457635, "clip_ratio/low_mean": 0.00466894032433629, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006520240567624569, "entropy": 0.2758827805519104, "epoch": 8.315068493150685, "grad_norm": 1.320323898949069, "kl": 1.059400200843811, "learning_rate": 2.9229452054794515e-07, "loss": 0.0753, "step": 1214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714328289032, "completions/max_length": 3000.0, "completions/max_terminated_length": 2108.0, "completions/mean_length": 291.2321472167969, "completions/mean_terminated_length": 137.90565490722656, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.4754503667354584, "epoch": 8.321917808219178, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.2273459562955156, "kl": 0.7429118156433105, "learning_rate": 2.9212328767123286e-07, "loss": 0.1579, "num_tokens": 13146851.0, "reward": 0.9159601926803589, "reward_std": 0.043996792286634445, "rewards/check_gptzero_func/mean": 0.9159601926803589, "rewards/check_gptzero_func/std": 0.2185737043619156, "sampling/importance_sampling_ratio/max": 1.3610246181488037, "sampling/importance_sampling_ratio/mean": 1.0004565715789795, "sampling/importance_sampling_ratio/min": 0.661616325378418, "sampling/sampling_logp_difference/max": 0.413069486618042, "sampling/sampling_logp_difference/mean": 0.011314122937619686, "step": 1215 }, { "clip_ratio/high_max": 0.010204081423580647, "clip_ratio/high_mean": 0.00264321849681437, "clip_ratio/low_mean": 0.003943297546356916, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006586515344679356, "entropy": 0.47643160820007324, "epoch": 8.32876712328767, "grad_norm": 1.1445252884335757, "kl": 0.7407418489456177, "learning_rate": 2.919520547945205e-07, "loss": 0.1522, "step": 1216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1902.0, "completions/mean_length": 297.2857360839844, "completions/mean_terminated_length": 248.14544677734375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.7426027059555054, "epoch": 8.335616438356164, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.3090501683035183, "kl": 0.5983628034591675, "learning_rate": 2.917808219178082e-07, "loss": 0.0648, "num_tokens": 13168273.0, "reward": 0.8231129050254822, "reward_std": 0.07282445579767227, "rewards/check_gptzero_func/mean": 0.8231129050254822, "rewards/check_gptzero_func/std": 0.32703447341918945, "sampling/importance_sampling_ratio/max": 1.489332675933838, "sampling/importance_sampling_ratio/mean": 1.0000898838043213, "sampling/importance_sampling_ratio/min": 0.681842565536499, "sampling/sampling_logp_difference/max": 0.39832818508148193, "sampling/sampling_logp_difference/mean": 0.01737845502793789, "step": 1217 }, { "clip_ratio/high_max": 0.013392857275903225, "clip_ratio/high_mean": 0.0038803634233772755, "clip_ratio/low_mean": 0.00363711710087955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007517480291426182, "entropy": 0.7433881163597107, "epoch": 8.342465753424657, "grad_norm": 1.2263116520244586, "kl": 0.5981301069259644, "learning_rate": 2.916095890410959e-07, "loss": 0.0579, "step": 1218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1512.0, "completions/max_terminated_length": 1512.0, "completions/mean_length": 251.0535888671875, "completions/mean_terminated_length": 251.0535888671875, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "entropy": 0.5450449585914612, "epoch": 8.349315068493151, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.5339245587347274, "kl": 0.8347800970077515, "learning_rate": 2.9143835616438356e-07, "loss": -0.0118, "num_tokens": 13187304.0, "reward": 0.9003098607063293, "reward_std": 0.019432535395026207, "rewards/check_gptzero_func/mean": 0.9003098607063293, "rewards/check_gptzero_func/std": 0.19960595667362213, "sampling/importance_sampling_ratio/max": 1.3937371969223022, "sampling/importance_sampling_ratio/mean": 1.0000008344650269, "sampling/importance_sampling_ratio/min": 0.6793226003646851, "sampling/sampling_logp_difference/max": 0.3866591453552246, "sampling/sampling_logp_difference/mean": 0.014828444458544254, "step": 1219 }, { "clip_ratio/high_max": 0.011377489194273949, "clip_ratio/high_mean": 0.004679120145738125, "clip_ratio/low_mean": 0.0028311212081462145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007510241121053696, "entropy": 0.5466564893722534, "epoch": 8.356164383561644, "grad_norm": 1.4130764888565175, "kl": 0.8319071531295776, "learning_rate": 2.912671232876712e-07, "loss": -0.0198, "step": 1220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1686.0, "completions/max_terminated_length": 1686.0, "completions/mean_length": 165.10714721679688, "completions/mean_terminated_length": 165.10714721679688, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.4436766803264618, "epoch": 8.363013698630137, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.8635643454789461, "kl": 0.8749558329582214, "learning_rate": 2.9109589041095887e-07, "loss": -0.0142, "num_tokens": 13201166.0, "reward": 0.9380283355712891, "reward_std": 0.017337799072265625, "rewards/check_gptzero_func/mean": 0.9380282759666443, "rewards/check_gptzero_func/std": 0.18652042746543884, "sampling/importance_sampling_ratio/max": 1.308778166770935, "sampling/importance_sampling_ratio/mean": 0.9999651312828064, "sampling/importance_sampling_ratio/min": 0.6772356033325195, "sampling/sampling_logp_difference/max": 0.3897360563278198, "sampling/sampling_logp_difference/mean": 0.011491657234728336, "step": 1221 }, { "clip_ratio/high_max": 0.011706948280334473, "clip_ratio/high_mean": 0.0038209385238587856, "clip_ratio/low_mean": 0.003390998113900423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007211936172097921, "entropy": 0.4431186616420746, "epoch": 8.36986301369863, "grad_norm": 1.6432337125104226, "kl": 0.8759810328483582, "learning_rate": 2.9092465753424657e-07, "loss": -0.0237, "step": 1222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1774.0, "completions/max_terminated_length": 1774.0, "completions/mean_length": 276.75, "completions/mean_terminated_length": 276.75, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.5157331228256226, "epoch": 8.376712328767123, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.473131590047212, "kl": 0.8907580971717834, "learning_rate": 2.907534246575342e-07, "loss": -0.0026, "num_tokens": 13221628.0, "reward": 0.804452121257782, "reward_std": 0.06960632652044296, "rewards/check_gptzero_func/mean": 0.8044520616531372, "rewards/check_gptzero_func/std": 0.3312378525733948, "sampling/importance_sampling_ratio/max": 1.6220511198043823, "sampling/importance_sampling_ratio/mean": 0.9997957944869995, "sampling/importance_sampling_ratio/min": 0.5744162797927856, "sampling/sampling_logp_difference/max": 0.5544009208679199, "sampling/sampling_logp_difference/mean": 0.012293885461986065, "step": 1223 }, { "clip_ratio/high_max": 0.011466617695987225, "clip_ratio/high_mean": 0.0041763014160096645, "clip_ratio/low_mean": 0.0017363644437864423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005912666209042072, "entropy": 0.5164910554885864, "epoch": 8.383561643835616, "grad_norm": 1.4144412454548452, "kl": 0.8933677673339844, "learning_rate": 2.9058219178082187e-07, "loss": -0.0103, "step": 1224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2036.0, "completions/max_terminated_length": 2036.0, "completions/mean_length": 252.75001525878906, "completions/mean_terminated_length": 252.75001525878906, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.5790081024169922, "epoch": 8.39041095890411, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.633828134145484, "kl": 1.1073614358901978, "learning_rate": 2.9041095890410957e-07, "loss": -0.0441, "num_tokens": 13240768.0, "reward": 0.9406851530075073, "reward_std": 0.025960169732570648, "rewards/check_gptzero_func/mean": 0.9406851530075073, "rewards/check_gptzero_func/std": 0.13727489113807678, "sampling/importance_sampling_ratio/max": 1.4148856401443481, "sampling/importance_sampling_ratio/mean": 1.0000524520874023, "sampling/importance_sampling_ratio/min": 0.6294134855270386, "sampling/sampling_logp_difference/max": 0.4629669189453125, "sampling/sampling_logp_difference/mean": 0.012823487631976604, "step": 1225 }, { "clip_ratio/high_max": 0.007656211499124765, "clip_ratio/high_mean": 0.002893943339586258, "clip_ratio/low_mean": 0.0016628159210085869, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004556759260594845, "entropy": 0.5809217095375061, "epoch": 8.397260273972602, "grad_norm": 1.3411572479564904, "kl": 0.9018777012825012, "learning_rate": 2.902397260273973e-07, "loss": -0.0516, "step": 1226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2436.0, "completions/max_terminated_length": 2436.0, "completions/mean_length": 198.33929443359375, "completions/mean_terminated_length": 198.33929443359375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.4615054130554199, "epoch": 8.404109589041095, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.505360452356767, "kl": 0.9317063689231873, "learning_rate": 2.9006849315068493e-07, "loss": 0.1719, "num_tokens": 13256421.0, "reward": 0.9429580569267273, "reward_std": 0.022781142964959145, "rewards/check_gptzero_func/mean": 0.9429579973220825, "rewards/check_gptzero_func/std": 0.1496720314025879, "sampling/importance_sampling_ratio/max": 1.4174528121948242, "sampling/importance_sampling_ratio/mean": 1.0004174709320068, "sampling/importance_sampling_ratio/min": 0.5959854125976562, "sampling/sampling_logp_difference/max": 0.5175390243530273, "sampling/sampling_logp_difference/mean": 0.011534995399415493, "step": 1227 }, { "clip_ratio/high_max": 0.013767208904027939, "clip_ratio/high_mean": 0.004785714205354452, "clip_ratio/low_mean": 0.004698152653872967, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.009483867324888706, "entropy": 0.46138185262680054, "epoch": 8.41095890410959, "grad_norm": 1.319978322072606, "kl": 0.9228838682174683, "learning_rate": 2.898972602739726e-07, "loss": 0.1646, "step": 1228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1732.0, "completions/mean_length": 242.0178680419922, "completions/mean_terminated_length": 191.8727264404297, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.5639949440956116, "epoch": 8.417808219178083, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.4084720092368617, "kl": 1.062002420425415, "learning_rate": 2.897260273972603e-07, "loss": 0.1532, "num_tokens": 13274526.0, "reward": 0.8901000022888184, "reward_std": 0.039240770041942596, "rewards/check_gptzero_func/mean": 0.8900999426841736, "rewards/check_gptzero_func/std": 0.24965864419937134, "sampling/importance_sampling_ratio/max": 1.3275219202041626, "sampling/importance_sampling_ratio/mean": 1.000369668006897, "sampling/importance_sampling_ratio/min": 0.7107278108596802, "sampling/sampling_logp_difference/max": 0.34146571159362793, "sampling/sampling_logp_difference/mean": 0.012650269083678722, "step": 1229 }, { "clip_ratio/high_max": 0.009604190476238728, "clip_ratio/high_mean": 0.0037250507157295942, "clip_ratio/low_mean": 0.0022872339468449354, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00601228466257453, "entropy": 0.5662215948104858, "epoch": 8.424657534246576, "grad_norm": 1.2877939630021693, "kl": 1.060799479484558, "learning_rate": 2.8955479452054793e-07, "loss": 0.1459, "step": 1230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1083.0, "completions/max_terminated_length": 1083.0, "completions/mean_length": 105.92857360839844, "completions/mean_terminated_length": 105.92857360839844, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.4628690183162689, "epoch": 8.431506849315069, "frac_reward_zero_std": 0.0, "grad_norm": 1.8658157184187754, "kl": 1.1922672986984253, "learning_rate": 2.893835616438356e-07, "loss": -0.0036, "num_tokens": 13284950.0, "reward": 0.9269139766693115, "reward_std": 0.01847725920379162, "rewards/check_gptzero_func/mean": 0.9269139170646667, "rewards/check_gptzero_func/std": 0.21846452355384827, "sampling/importance_sampling_ratio/max": 1.2653300762176514, "sampling/importance_sampling_ratio/mean": 0.9994160532951355, "sampling/importance_sampling_ratio/min": 0.7205344438552856, "sampling/sampling_logp_difference/max": 0.3277621269226074, "sampling/sampling_logp_difference/mean": 0.010442272759974003, "step": 1231 }, { "clip_ratio/high_max": 0.022736500948667526, "clip_ratio/high_mean": 0.0045042335987091064, "clip_ratio/low_mean": 0.0037512225098907948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.008255456574261189, "entropy": 0.4667692184448242, "epoch": 8.438356164383562, "grad_norm": 1.6149551255361554, "kl": 1.1855138540267944, "learning_rate": 2.892123287671233e-07, "loss": -0.0123, "step": 1232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2139.0, "completions/max_terminated_length": 2139.0, "completions/mean_length": 256.8035888671875, "completions/mean_terminated_length": 256.8035888671875, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.37264564633369446, "epoch": 8.445205479452055, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.5044513208406094, "kl": 0.9025307893753052, "learning_rate": 2.8904109589041093e-07, "loss": -0.0136, "num_tokens": 13304711.0, "reward": 0.9380730390548706, "reward_std": 0.02552965097129345, "rewards/check_gptzero_func/mean": 0.9380730390548706, "rewards/check_gptzero_func/std": 0.15446530282497406, "sampling/importance_sampling_ratio/max": 1.3809764385223389, "sampling/importance_sampling_ratio/mean": 0.999517023563385, "sampling/importance_sampling_ratio/min": 0.7402400374412537, "sampling/sampling_logp_difference/max": 0.32279086112976074, "sampling/sampling_logp_difference/mean": 0.00951756164431572, "step": 1233 }, { "clip_ratio/high_max": 0.012535910122096539, "clip_ratio/high_mean": 0.003790787188336253, "clip_ratio/low_mean": 0.001572542474605143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005363330245018005, "entropy": 0.37233757972717285, "epoch": 8.452054794520548, "grad_norm": 1.3465383070718688, "kl": 0.9010539054870605, "learning_rate": 2.8886986301369864e-07, "loss": -0.0213, "step": 1234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1724.0, "completions/max_terminated_length": 1724.0, "completions/mean_length": 183.94644165039062, "completions/mean_terminated_length": 183.94644165039062, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 0.473236083984375, "epoch": 8.45890410958904, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.6606099518062971, "kl": 1.0612026453018188, "learning_rate": 2.886986301369863e-07, "loss": 0.0986, "num_tokens": 13320056.0, "reward": 0.8782122135162354, "reward_std": 0.06347829103469849, "rewards/check_gptzero_func/mean": 0.8782121539115906, "rewards/check_gptzero_func/std": 0.26683586835861206, "sampling/importance_sampling_ratio/max": 1.3796802759170532, "sampling/importance_sampling_ratio/mean": 0.9999917149543762, "sampling/importance_sampling_ratio/min": 0.5370863676071167, "sampling/sampling_logp_difference/max": 0.6215963363647461, "sampling/sampling_logp_difference/mean": 0.011568459682166576, "step": 1235 }, { "clip_ratio/high_max": 0.012384653091430664, "clip_ratio/high_mean": 0.0036982186138629913, "clip_ratio/low_mean": 0.0037346791941672564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007432898040860891, "entropy": 0.4736156165599823, "epoch": 8.465753424657533, "grad_norm": 1.4631927512349878, "kl": 1.0484405755996704, "learning_rate": 2.88527397260274e-07, "loss": 0.0904, "step": 1236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1644.0, "completions/mean_length": 280.4821472167969, "completions/mean_terminated_length": 231.03636169433594, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.6033286452293396, "epoch": 8.472602739726028, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 2.095746219313501, "kl": 0.8508370518684387, "learning_rate": 2.8835616438356164e-07, "loss": 0.1329, "num_tokens": 13340753.0, "reward": 0.8523535132408142, "reward_std": 0.07200168073177338, "rewards/check_gptzero_func/mean": 0.8523535132408142, "rewards/check_gptzero_func/std": 0.2851071357727051, "sampling/importance_sampling_ratio/max": 1.3655023574829102, "sampling/importance_sampling_ratio/mean": 0.999742329120636, "sampling/importance_sampling_ratio/min": 0.6821480989456177, "sampling/sampling_logp_difference/max": 0.3825085163116455, "sampling/sampling_logp_difference/mean": 0.014117459766566753, "step": 1237 }, { "clip_ratio/high_max": 0.007835821248590946, "clip_ratio/high_mean": 0.003466365858912468, "clip_ratio/low_mean": 0.0016685326118022203, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005134898703545332, "entropy": 0.6061493754386902, "epoch": 8.479452054794521, "grad_norm": 1.3142207030538473, "kl": 0.6892843246459961, "learning_rate": 2.881849315068493e-07, "loss": 0.1252, "step": 1238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 156.5357208251953, "completions/mean_terminated_length": 156.5357208251953, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.657002329826355, "epoch": 8.486301369863014, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.9172026564224263, "kl": 0.9686049222946167, "learning_rate": 2.88013698630137e-07, "loss": -0.0715, "num_tokens": 13354403.0, "reward": 0.9138543605804443, "reward_std": 0.05904483050107956, "rewards/check_gptzero_func/mean": 0.9138543009757996, "rewards/check_gptzero_func/std": 0.22966104745864868, "sampling/importance_sampling_ratio/max": 1.5118381977081299, "sampling/importance_sampling_ratio/mean": 0.9999306797981262, "sampling/importance_sampling_ratio/min": 0.7473167181015015, "sampling/sampling_logp_difference/max": 0.4133262634277344, "sampling/sampling_logp_difference/mean": 0.014375046826899052, "step": 1239 }, { "clip_ratio/high_max": 0.012865496799349785, "clip_ratio/high_mean": 0.004227799363434315, "clip_ratio/low_mean": 0.0025396852288395166, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006767484359443188, "entropy": 0.6576915979385376, "epoch": 8.493150684931507, "grad_norm": 1.7185267519154077, "kl": 0.9447323083877563, "learning_rate": 2.8784246575342465e-07, "loss": -0.0803, "step": 1240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1571.0, "completions/max_terminated_length": 1571.0, "completions/mean_length": 143.23214721679688, "completions/mean_terminated_length": 143.23214721679688, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.3564545810222626, "epoch": 8.5, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.5103855978100118, "kl": 0.896148681640625, "learning_rate": 2.876712328767123e-07, "loss": 0.0225, "num_tokens": 13368040.0, "reward": 0.8817011713981628, "reward_std": 0.020891426131129265, "rewards/check_gptzero_func/mean": 0.8817011117935181, "rewards/check_gptzero_func/std": 0.2764611840248108, "sampling/importance_sampling_ratio/max": 1.3969224691390991, "sampling/importance_sampling_ratio/mean": 0.9998841285705566, "sampling/importance_sampling_ratio/min": 0.7875920534133911, "sampling/sampling_logp_difference/max": 0.3342716693878174, "sampling/sampling_logp_difference/mean": 0.007319167722016573, "step": 1241 }, { "clip_ratio/high_max": 0.01035306602716446, "clip_ratio/high_mean": 0.0023711365647614002, "clip_ratio/low_mean": 0.0016602950636297464, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00403143186122179, "entropy": 0.3582352101802826, "epoch": 8.506849315068493, "grad_norm": 1.4126435350229039, "kl": 0.8930253386497498, "learning_rate": 2.8749999999999995e-07, "loss": 0.0151, "step": 1242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 142.07144165039062, "completions/mean_terminated_length": 142.07144165039062, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.315469890832901, "epoch": 8.513698630136986, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.730712503790661, "kl": 1.030104637145996, "learning_rate": 2.873287671232877e-07, "loss": 0.1626, "num_tokens": 13380610.0, "reward": 0.9199000000953674, "reward_std": 0.06273525953292847, "rewards/check_gptzero_func/mean": 0.9198999404907227, "rewards/check_gptzero_func/std": 0.19705118238925934, "sampling/importance_sampling_ratio/max": 1.2695696353912354, "sampling/importance_sampling_ratio/mean": 0.9996982216835022, "sampling/importance_sampling_ratio/min": 0.6774014830589294, "sampling/sampling_logp_difference/max": 0.38949108123779297, "sampling/sampling_logp_difference/mean": 0.00787105318158865, "step": 1243 }, { "clip_ratio/high_max": 0.010961906984448433, "clip_ratio/high_mean": 0.0018693169113248587, "clip_ratio/low_mean": 0.0034119407646358013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005281257908791304, "entropy": 0.31515738368034363, "epoch": 8.520547945205479, "grad_norm": 1.5311111713199514, "kl": 1.0335452556610107, "learning_rate": 2.8715753424657535e-07, "loss": 0.1542, "step": 1244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1761.0, "completions/mean_length": 419.5000305175781, "completions/mean_terminated_length": 323.9259338378906, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.8093037009239197, "epoch": 8.527397260273972, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 3.325722533902191, "kl": 1.0191437005996704, "learning_rate": 2.86986301369863e-07, "loss": 0.2182, "num_tokens": 13408520.0, "reward": 0.822748601436615, "reward_std": 0.06083981692790985, "rewards/check_gptzero_func/mean": 0.822748601436615, "rewards/check_gptzero_func/std": 0.2822434902191162, "sampling/importance_sampling_ratio/max": 1.5091429948806763, "sampling/importance_sampling_ratio/mean": 0.9998707175254822, "sampling/importance_sampling_ratio/min": 0.6964145302772522, "sampling/sampling_logp_difference/max": 0.4115419387817383, "sampling/sampling_logp_difference/mean": 0.017999893054366112, "step": 1245 }, { "clip_ratio/high_max": 0.004661365412175655, "clip_ratio/high_mean": 0.0026872786693274975, "clip_ratio/low_mean": 0.0012258208589628339, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0039130994118750095, "entropy": 0.810450553894043, "epoch": 8.534246575342467, "grad_norm": 1.1788321198782832, "kl": 0.6810252070426941, "learning_rate": 2.8681506849315065e-07, "loss": 0.2115, "step": 1246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1987.0, "completions/mean_length": 262.6071472167969, "completions/mean_terminated_length": 161.22222900390625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.40271395444869995, "epoch": 8.54109589041096, "frac_reward_zero_std": 0.0, "grad_norm": 1.1357082922283133, "kl": 0.7331116795539856, "learning_rate": 2.8664383561643836e-07, "loss": 0.307, "num_tokens": 13427800.0, "reward": 0.9071274995803833, "reward_std": 0.04200298711657524, "rewards/check_gptzero_func/mean": 0.9071274399757385, "rewards/check_gptzero_func/std": 0.2587879002094269, "sampling/importance_sampling_ratio/max": 1.4750303030014038, "sampling/importance_sampling_ratio/mean": 0.9996682405471802, "sampling/importance_sampling_ratio/min": 0.7105809450149536, "sampling/sampling_logp_difference/max": 0.38867855072021484, "sampling/sampling_logp_difference/mean": 0.008992025628685951, "step": 1247 }, { "clip_ratio/high_max": 0.011230346746742725, "clip_ratio/high_mean": 0.0026482020039111376, "clip_ratio/low_mean": 0.001734053366817534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004382255487143993, "entropy": 0.4045446217060089, "epoch": 8.547945205479452, "grad_norm": 1.0809399237627275, "kl": 0.7309139966964722, "learning_rate": 2.86472602739726e-07, "loss": 0.3014, "step": 1248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1970.0, "completions/max_terminated_length": 1970.0, "completions/mean_length": 234.50001525878906, "completions/mean_terminated_length": 234.50001525878906, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.6766872406005859, "epoch": 8.554794520547945, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.6306551202623947, "kl": 0.8266690373420715, "learning_rate": 2.8630136986301366e-07, "loss": 0.0221, "num_tokens": 13445664.0, "reward": 0.8617356419563293, "reward_std": 0.026918264105916023, "rewards/check_gptzero_func/mean": 0.8617356419563293, "rewards/check_gptzero_func/std": 0.26333320140838623, "sampling/importance_sampling_ratio/max": 1.4731721878051758, "sampling/importance_sampling_ratio/mean": 1.0001227855682373, "sampling/importance_sampling_ratio/min": 0.6389775276184082, "sampling/sampling_logp_difference/max": 0.44788599014282227, "sampling/sampling_logp_difference/mean": 0.015820255503058434, "step": 1249 }, { "clip_ratio/high_max": 0.008600651286542416, "clip_ratio/high_mean": 0.004253643099218607, "clip_ratio/low_mean": 0.00263237371109426, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0068860165774822235, "entropy": 0.6775392889976501, "epoch": 8.561643835616438, "grad_norm": 1.479416270031903, "kl": 0.8202248811721802, "learning_rate": 2.8613013698630136e-07, "loss": 0.0136, "step": 1250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2374.0, "completions/mean_length": 371.5714416503906, "completions/mean_terminated_length": 323.78179931640625, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.6303897500038147, "epoch": 8.568493150684931, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.2360588842789306, "kl": 0.7087583541870117, "learning_rate": 2.85958904109589e-07, "loss": 0.0336, "num_tokens": 13471608.0, "reward": 0.8505813479423523, "reward_std": 0.0717533528804779, "rewards/check_gptzero_func/mean": 0.8505812883377075, "rewards/check_gptzero_func/std": 0.2691706120967865, "sampling/importance_sampling_ratio/max": 1.5782508850097656, "sampling/importance_sampling_ratio/mean": 0.9999948740005493, "sampling/importance_sampling_ratio/min": 0.6370593905448914, "sampling/sampling_logp_difference/max": 0.4563171863555908, "sampling/sampling_logp_difference/mean": 0.015607435256242752, "step": 1251 }, { "clip_ratio/high_max": 0.007741167210042477, "clip_ratio/high_mean": 0.004370052367448807, "clip_ratio/low_mean": 0.0027582074981182814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007128260098397732, "entropy": 0.6317011713981628, "epoch": 8.575342465753424, "grad_norm": 1.1254092856335598, "kl": 0.7066395878791809, "learning_rate": 2.857876712328767e-07, "loss": 0.0271, "step": 1252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857164144516, "completions/max_length": 3000.0, "completions/max_terminated_length": 1696.0, "completions/mean_length": 440.83929443359375, "completions/mean_terminated_length": 189.94117736816406, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.4498181939125061, "epoch": 8.582191780821917, "frac_reward_zero_std": 0.0, "grad_norm": 0.756507220351809, "kl": 0.6091071367263794, "learning_rate": 2.8561643835616437e-07, "loss": 0.2751, "num_tokens": 13501065.0, "reward": 0.9038406610488892, "reward_std": 0.05785595625638962, "rewards/check_gptzero_func/mean": 0.9038406014442444, "rewards/check_gptzero_func/std": 0.21287937462329865, "sampling/importance_sampling_ratio/max": 1.576637864112854, "sampling/importance_sampling_ratio/mean": 1.0002764463424683, "sampling/importance_sampling_ratio/min": 0.6632490158081055, "sampling/sampling_logp_difference/max": 0.4552946090698242, "sampling/sampling_logp_difference/mean": 0.008952671661973, "step": 1253 }, { "clip_ratio/high_max": 0.00689914682880044, "clip_ratio/high_mean": 0.0012499996228143573, "clip_ratio/low_mean": 0.0017446157289668918, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002994615351781249, "entropy": 0.45026466250419617, "epoch": 8.58904109589041, "grad_norm": 0.7023620759887087, "kl": 0.6054006218910217, "learning_rate": 2.8544520547945207e-07, "loss": 0.2712, "step": 1254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2725.0, "completions/max_terminated_length": 2725.0, "completions/mean_length": 260.0714416503906, "completions/mean_terminated_length": 260.0714416503906, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.5084091424942017, "epoch": 8.595890410958905, "frac_reward_zero_std": 0.1785714328289032, "grad_norm": 1.6920831672594805, "kl": 1.487350344657898, "learning_rate": 2.852739726027397e-07, "loss": 0.1782, "num_tokens": 13520139.0, "reward": 0.8700066208839417, "reward_std": 0.0781271755695343, "rewards/check_gptzero_func/mean": 0.8700065612792969, "rewards/check_gptzero_func/std": 0.26243799924850464, "sampling/importance_sampling_ratio/max": 1.398794412612915, "sampling/importance_sampling_ratio/mean": 1.000324010848999, "sampling/importance_sampling_ratio/min": 0.567655622959137, "sampling/sampling_logp_difference/max": 0.5662403106689453, "sampling/sampling_logp_difference/mean": 0.012700277380645275, "step": 1255 }, { "clip_ratio/high_max": 0.006132461130619049, "clip_ratio/high_mean": 0.0022087753750383854, "clip_ratio/low_mean": 0.0022872122935950756, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004495987202972174, "entropy": 0.5092261433601379, "epoch": 8.602739726027398, "grad_norm": 1.489560466900706, "kl": 1.2265781164169312, "learning_rate": 2.8510273972602737e-07, "loss": 0.1699, "step": 1256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1897.0, "completions/mean_length": 295.5, "completions/mean_terminated_length": 246.3272705078125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.5956727862358093, "epoch": 8.60958904109589, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3289124848918303, "kl": 0.7608214616775513, "learning_rate": 2.849315068493151e-07, "loss": 0.1707, "num_tokens": 13541351.0, "reward": 0.9239935874938965, "reward_std": 0.022086866199970245, "rewards/check_gptzero_func/mean": 0.9239935278892517, "rewards/check_gptzero_func/std": 0.16471506655216217, "sampling/importance_sampling_ratio/max": 1.6009150743484497, "sampling/importance_sampling_ratio/mean": 1.0000883340835571, "sampling/importance_sampling_ratio/min": 0.7129825949668884, "sampling/sampling_logp_difference/max": 0.47057533264160156, "sampling/sampling_logp_difference/mean": 0.013582575134932995, "step": 1257 }, { "clip_ratio/high_max": 0.010544815100729465, "clip_ratio/high_mean": 0.003452175995334983, "clip_ratio/low_mean": 0.0020517623052001, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0055039385333657265, "entropy": 0.5975200533866882, "epoch": 8.616438356164384, "grad_norm": 1.2165542696960467, "kl": 0.7574183344841003, "learning_rate": 2.847602739726027e-07, "loss": 0.1642, "step": 1258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1944.0, "completions/max_terminated_length": 1944.0, "completions/mean_length": 291.9285888671875, "completions/mean_terminated_length": 291.9285888671875, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "entropy": 0.675785481929779, "epoch": 8.623287671232877, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.4603236304621248, "kl": 1.1813602447509766, "learning_rate": 2.845890410958904e-07, "loss": 0.1079, "num_tokens": 13562433.0, "reward": 0.8942974805831909, "reward_std": 0.06714069843292236, "rewards/check_gptzero_func/mean": 0.8942974805831909, "rewards/check_gptzero_func/std": 0.2269466668367386, "sampling/importance_sampling_ratio/max": 1.4115486145019531, "sampling/importance_sampling_ratio/mean": 1.0006427764892578, "sampling/importance_sampling_ratio/min": 0.6007027626037598, "sampling/sampling_logp_difference/max": 0.5096549987792969, "sampling/sampling_logp_difference/mean": 0.013874277472496033, "step": 1259 }, { "clip_ratio/high_max": 0.010103845037519932, "clip_ratio/high_mean": 0.0028661389369517565, "clip_ratio/low_mean": 0.0022472210694104433, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0051133595407009125, "entropy": 0.677144467830658, "epoch": 8.63013698630137, "grad_norm": 1.3397543283003164, "kl": 1.1517198085784912, "learning_rate": 2.84417808219178e-07, "loss": 0.1002, "step": 1260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1980.0, "completions/mean_length": 509.8035888671875, "completions/mean_terminated_length": 417.5740661621094, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.7595158815383911, "epoch": 8.636986301369863, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.0943888493045364, "kl": 0.7486649751663208, "learning_rate": 2.842465753424658e-07, "loss": 0.0203, "num_tokens": 13595436.0, "reward": 0.7790180444717407, "reward_std": 0.07481761276721954, "rewards/check_gptzero_func/mean": 0.779017984867096, "rewards/check_gptzero_func/std": 0.3163590729236603, "sampling/importance_sampling_ratio/max": 1.6952433586120605, "sampling/importance_sampling_ratio/mean": 1.0000807046890259, "sampling/importance_sampling_ratio/min": 0.6181741952896118, "sampling/sampling_logp_difference/max": 0.5278263092041016, "sampling/sampling_logp_difference/mean": 0.018430953845381737, "step": 1261 }, { "clip_ratio/high_max": 0.006332676392048597, "clip_ratio/high_mean": 0.0034990988206118345, "clip_ratio/low_mean": 0.001998328370973468, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005497428122907877, "entropy": 0.7592921853065491, "epoch": 8.643835616438356, "grad_norm": 1.040100874479838, "kl": 0.7465314269065857, "learning_rate": 2.8407534246575343e-07, "loss": 0.0146, "step": 1262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 228.19644165039062, "completions/mean_terminated_length": 125.53704071044922, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.5038853883743286, "epoch": 8.650684931506849, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.1983339761726297, "kl": 0.936257004737854, "learning_rate": 2.839041095890411e-07, "loss": 0.3452, "num_tokens": 13613259.0, "reward": 0.9358243942260742, "reward_std": 0.03584354370832443, "rewards/check_gptzero_func/mean": 0.9358243346214294, "rewards/check_gptzero_func/std": 0.16361437737941742, "sampling/importance_sampling_ratio/max": 1.3016048669815063, "sampling/importance_sampling_ratio/mean": 1.00005042552948, "sampling/importance_sampling_ratio/min": 0.7571290135383606, "sampling/sampling_logp_difference/max": 0.27822160720825195, "sampling/sampling_logp_difference/mean": 0.009637224487960339, "step": 1263 }, { "clip_ratio/high_max": 0.01297625619918108, "clip_ratio/high_mean": 0.002218787092715502, "clip_ratio/low_mean": 0.001825949177145958, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00404473626986146, "entropy": 0.5036876797676086, "epoch": 8.657534246575342, "grad_norm": 1.0689937930504008, "kl": 0.9329806566238403, "learning_rate": 2.837328767123288e-07, "loss": 0.3396, "step": 1264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2403.0, "completions/mean_length": 245.6428680419922, "completions/mean_terminated_length": 195.56362915039062, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.6672989726066589, "epoch": 8.664383561643836, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.4623601345493362, "kl": 1.0136040449142456, "learning_rate": 2.8356164383561644e-07, "loss": 0.2866, "num_tokens": 13631825.0, "reward": 0.9373060464859009, "reward_std": 0.03188337758183479, "rewards/check_gptzero_func/mean": 0.9373059868812561, "rewards/check_gptzero_func/std": 0.1817995309829712, "sampling/importance_sampling_ratio/max": 1.3570585250854492, "sampling/importance_sampling_ratio/mean": 1.000018835067749, "sampling/importance_sampling_ratio/min": 0.5996053814888, "sampling/sampling_logp_difference/max": 0.5114835500717163, "sampling/sampling_logp_difference/mean": 0.013475869782269001, "step": 1265 }, { "clip_ratio/high_max": 0.009732360020279884, "clip_ratio/high_mean": 0.0024195704609155655, "clip_ratio/low_mean": 0.0017553148791193962, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004174885340034962, "entropy": 0.6677612066268921, "epoch": 8.67123287671233, "grad_norm": 17.585130870676704, "kl": 1.2493637800216675, "learning_rate": 2.833904109589041e-07, "loss": 0.2844, "step": 1266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285746216774, "completions/max_length": 3000.0, "completions/max_terminated_length": 1673.0, "completions/mean_length": 426.8035888671875, "completions/mean_terminated_length": 228.86538696289062, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.6825860142707825, "epoch": 8.678082191780822, "frac_reward_zero_std": 0.0, "grad_norm": 0.9775007333614494, "kl": 0.6211205720901489, "learning_rate": 2.8321917808219174e-07, "loss": 0.2459, "num_tokens": 13660674.0, "reward": 0.8899908661842346, "reward_std": 0.07928697764873505, "rewards/check_gptzero_func/mean": 0.8899908065795898, "rewards/check_gptzero_func/std": 0.24243207275867462, "sampling/importance_sampling_ratio/max": 1.6069681644439697, "sampling/importance_sampling_ratio/mean": 1.0003347396850586, "sampling/importance_sampling_ratio/min": 0.6254435181617737, "sampling/sampling_logp_difference/max": 0.4743492603302002, "sampling/sampling_logp_difference/mean": 0.013441347517073154, "step": 1267 }, { "clip_ratio/high_max": 0.008315367624163628, "clip_ratio/high_mean": 0.0023941651452332735, "clip_ratio/low_mean": 0.003623183351010084, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006017348729074001, "entropy": 0.6815707087516785, "epoch": 8.684931506849315, "grad_norm": 0.9025542264697304, "kl": 0.6199932098388672, "learning_rate": 2.8304794520547944e-07, "loss": 0.2409, "step": 1268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 248.8035888671875, "completions/mean_terminated_length": 198.7818145751953, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.47281715273857117, "epoch": 8.691780821917808, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.5746354772099536, "kl": 0.9580740332603455, "learning_rate": 2.828767123287671e-07, "loss": 0.151, "num_tokens": 13679795.0, "reward": 0.914639949798584, "reward_std": 0.04815761372447014, "rewards/check_gptzero_func/mean": 0.9146398901939392, "rewards/check_gptzero_func/std": 0.20689374208450317, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9998430609703064, "sampling/importance_sampling_ratio/min": 0.730817973613739, "sampling/sampling_logp_difference/max": 0.7826442718505859, "sampling/sampling_logp_difference/mean": 0.011837459169328213, "step": 1269 }, { "clip_ratio/high_max": 0.009244992397725582, "clip_ratio/high_mean": 0.0030273518059402704, "clip_ratio/low_mean": 0.0025713033974170685, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00559865590184927, "entropy": 0.47170862555503845, "epoch": 8.698630136986301, "grad_norm": 1.229469277854305, "kl": 0.9511370658874512, "learning_rate": 2.827054794520548e-07, "loss": 0.1454, "step": 1270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 2114.0, "completions/mean_length": 311.5714416503906, "completions/mean_terminated_length": 212.0, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.7201946377754211, "epoch": 8.705479452054794, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.7081014253095261, "kl": 1.5517469644546509, "learning_rate": 2.825342465753425e-07, "loss": -0.0028, "num_tokens": 13702295.0, "reward": 0.8959816694259644, "reward_std": 0.031836237758398056, "rewards/check_gptzero_func/mean": 0.8959816098213196, "rewards/check_gptzero_func/std": 0.22612860798835754, "sampling/importance_sampling_ratio/max": 1.588585376739502, "sampling/importance_sampling_ratio/mean": 0.9999755620956421, "sampling/importance_sampling_ratio/min": 0.6955755352973938, "sampling/sampling_logp_difference/max": 0.4628438949584961, "sampling/sampling_logp_difference/mean": 0.014324167743325233, "step": 1271 }, { "clip_ratio/high_max": 0.007821552455425262, "clip_ratio/high_mean": 0.0025178224314004183, "clip_ratio/low_mean": 0.001339700655080378, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003857523202896118, "entropy": 0.7237152457237244, "epoch": 8.712328767123287, "grad_norm": 1.305755999538586, "kl": 1.198018193244934, "learning_rate": 2.8236301369863015e-07, "loss": -0.0093, "step": 1272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1004.0, "completions/max_terminated_length": 1004.0, "completions/mean_length": 113.00000762939453, "completions/mean_terminated_length": 113.00000762939453, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.5402081608772278, "epoch": 8.719178082191782, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.770829656968077, "kl": 1.23136305809021, "learning_rate": 2.821917808219178e-07, "loss": 0.0857, "num_tokens": 13713083.0, "reward": 0.9381142258644104, "reward_std": 0.05680786073207855, "rewards/check_gptzero_func/mean": 0.9381141662597656, "rewards/check_gptzero_func/std": 0.19467754662036896, "sampling/importance_sampling_ratio/max": 1.3266862630844116, "sampling/importance_sampling_ratio/mean": 1.0000860691070557, "sampling/importance_sampling_ratio/min": 0.7538689970970154, "sampling/sampling_logp_difference/max": 0.2826843857765198, "sampling/sampling_logp_difference/mean": 0.010725976899266243, "step": 1273 }, { "clip_ratio/high_max": 0.017006803303956985, "clip_ratio/high_mean": 0.002944830572232604, "clip_ratio/low_mean": 0.003740864573046565, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006685695610940456, "entropy": 0.5405076742172241, "epoch": 8.726027397260275, "grad_norm": 1.5468288548343738, "kl": 1.216382384300232, "learning_rate": 2.8202054794520545e-07, "loss": 0.0774, "step": 1274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714328289032, "completions/max_length": 3000.0, "completions/max_terminated_length": 2279.0, "completions/mean_length": 447.3750305175781, "completions/mean_terminated_length": 302.8868103027344, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.8575796484947205, "epoch": 8.732876712328768, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.041296205516471, "kl": 0.5730148553848267, "learning_rate": 2.8184931506849315e-07, "loss": 0.0903, "num_tokens": 13743344.0, "reward": 0.8955116868019104, "reward_std": 0.034925784915685654, "rewards/check_gptzero_func/mean": 0.8955116271972656, "rewards/check_gptzero_func/std": 0.21618609130382538, "sampling/importance_sampling_ratio/max": 1.6467907428741455, "sampling/importance_sampling_ratio/mean": 0.9998248219490051, "sampling/importance_sampling_ratio/min": 0.6164628267288208, "sampling/sampling_logp_difference/max": 0.4988284111022949, "sampling/sampling_logp_difference/mean": 0.018694482743740082, "step": 1275 }, { "clip_ratio/high_max": 0.00826446246355772, "clip_ratio/high_mean": 0.004465098027139902, "clip_ratio/low_mean": 0.0027915318496525288, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007256629876792431, "entropy": 0.8581776022911072, "epoch": 8.73972602739726, "grad_norm": 1.1015332693689344, "kl": 0.5679178833961487, "learning_rate": 2.816780821917808e-07, "loss": 0.0848, "step": 1276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 458.0, "completions/mean_length": 141.5178680419922, "completions/mean_terminated_length": 89.54544830322266, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.48438915610313416, "epoch": 8.746575342465754, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.3114538158755153, "kl": 0.8987589478492737, "learning_rate": 2.8150684931506845e-07, "loss": 0.2673, "num_tokens": 13756655.0, "reward": 0.9756723642349243, "reward_std": 0.02611350640654564, "rewards/check_gptzero_func/mean": 0.9756723046302795, "rewards/check_gptzero_func/std": 0.09669280797243118, "sampling/importance_sampling_ratio/max": 1.9610785245895386, "sampling/importance_sampling_ratio/mean": 1.0001862049102783, "sampling/importance_sampling_ratio/min": 0.6016572117805481, "sampling/sampling_logp_difference/max": 0.6734945774078369, "sampling/sampling_logp_difference/mean": 0.008605504408478737, "step": 1277 }, { "clip_ratio/high_max": 0.004624277353286743, "clip_ratio/high_mean": 0.000997538329102099, "clip_ratio/low_mean": 0.0016237808158621192, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002621319144964218, "entropy": 0.4850985109806061, "epoch": 8.753424657534246, "grad_norm": 1.2664193957223966, "kl": 0.9060577750205994, "learning_rate": 2.813356164383561e-07, "loss": 0.2631, "step": 1278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2705.0, "completions/max_terminated_length": 2705.0, "completions/mean_length": 263.83929443359375, "completions/mean_terminated_length": 263.83929443359375, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.7827391028404236, "epoch": 8.76027397260274, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.4538689225493668, "kl": 0.9586588740348816, "learning_rate": 2.8116438356164386e-07, "loss": 0.0723, "num_tokens": 13776476.0, "reward": 0.9371180534362793, "reward_std": 0.023808438330888748, "rewards/check_gptzero_func/mean": 0.9371179938316345, "rewards/check_gptzero_func/std": 0.13197103142738342, "sampling/importance_sampling_ratio/max": 1.6519525051116943, "sampling/importance_sampling_ratio/mean": 0.9996377229690552, "sampling/importance_sampling_ratio/min": 0.5783451795578003, "sampling/sampling_logp_difference/max": 0.5475844144821167, "sampling/sampling_logp_difference/mean": 0.0165325365960598, "step": 1279 }, { "clip_ratio/high_max": 0.009939651936292648, "clip_ratio/high_mean": 0.0029500662349164486, "clip_ratio/low_mean": 0.0029593787621706724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005909445229917765, "entropy": 0.781844437122345, "epoch": 8.767123287671232, "grad_norm": 1.3445922098206478, "kl": 0.9517207741737366, "learning_rate": 2.809931506849315e-07, "loss": 0.0654, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1742.0, "completions/max_terminated_length": 1742.0, "completions/mean_length": 333.51788330078125, "completions/mean_terminated_length": 333.51788330078125, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.6977454423904419, "epoch": 8.773972602739725, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.3874384349047963, "kl": 0.820027768611908, "learning_rate": 2.8082191780821916e-07, "loss": 0.0354, "num_tokens": 13799841.0, "reward": 0.8893395066261292, "reward_std": 0.03727739304304123, "rewards/check_gptzero_func/mean": 0.8893394470214844, "rewards/check_gptzero_func/std": 0.22899065911769867, "sampling/importance_sampling_ratio/max": 1.5451017618179321, "sampling/importance_sampling_ratio/mean": 0.9997681379318237, "sampling/importance_sampling_ratio/min": 0.6623234748840332, "sampling/sampling_logp_difference/max": 0.4350898265838623, "sampling/sampling_logp_difference/mean": 0.01728481613099575, "step": 1281 }, { "clip_ratio/high_max": 0.007544006686657667, "clip_ratio/high_mean": 0.0035026196856051683, "clip_ratio/low_mean": 0.003006385173648596, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006509005092084408, "entropy": 0.6951259970664978, "epoch": 8.780821917808218, "grad_norm": 1.2950207930629745, "kl": 0.8190164566040039, "learning_rate": 2.8065068493150686e-07, "loss": 0.028, "step": 1282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1667.0, "completions/max_terminated_length": 1667.0, "completions/mean_length": 175.55357360839844, "completions/mean_terminated_length": 175.55357360839844, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 0.5345529913902283, "epoch": 8.787671232876713, "frac_reward_zero_std": 0.1071428656578064, "grad_norm": 1.705853931275467, "kl": 1.0439223051071167, "learning_rate": 2.804794520547945e-07, "loss": 0.0134, "num_tokens": 13814370.0, "reward": 0.9522179365158081, "reward_std": 0.009760337881743908, "rewards/check_gptzero_func/mean": 0.9522179961204529, "rewards/check_gptzero_func/std": 0.13749082386493683, "sampling/importance_sampling_ratio/max": 1.4091317653656006, "sampling/importance_sampling_ratio/mean": 0.999950110912323, "sampling/importance_sampling_ratio/min": 0.6210192441940308, "sampling/sampling_logp_difference/max": 0.4763932228088379, "sampling/sampling_logp_difference/mean": 0.01153953280299902, "step": 1283 }, { "clip_ratio/high_max": 0.012908541597425938, "clip_ratio/high_mean": 0.0042977482080459595, "clip_ratio/low_mean": 0.0027444779407233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007042226381599903, "entropy": 0.5336617231369019, "epoch": 8.794520547945206, "grad_norm": 1.4822933536630525, "kl": 1.0448194742202759, "learning_rate": 2.8030821917808216e-07, "loss": 0.0053, "step": 1284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2607.0, "completions/mean_length": 332.39288330078125, "completions/mean_terminated_length": 283.8908996582031, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.628317654132843, "epoch": 8.801369863013699, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 2.0334000975398707, "kl": 1.0087538957595825, "learning_rate": 2.801369863013698e-07, "loss": 0.02, "num_tokens": 13838118.0, "reward": 0.8889856338500977, "reward_std": 0.060845378786325455, "rewards/check_gptzero_func/mean": 0.8889855742454529, "rewards/check_gptzero_func/std": 0.2470438927412033, "sampling/importance_sampling_ratio/max": 1.7065176963806152, "sampling/importance_sampling_ratio/mean": 1.000117301940918, "sampling/importance_sampling_ratio/min": 0.68808513879776, "sampling/sampling_logp_difference/max": 0.5344548225402832, "sampling/sampling_logp_difference/mean": 0.012628275901079178, "step": 1285 }, { "clip_ratio/high_max": 0.006641509477049112, "clip_ratio/high_mean": 0.002029869006946683, "clip_ratio/low_mean": 0.000768402882385999, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027982720639556646, "entropy": 0.6305088400840759, "epoch": 8.808219178082192, "grad_norm": 1.220974704049861, "kl": 0.8357948660850525, "learning_rate": 2.799657534246575e-07, "loss": 0.0141, "step": 1286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1847.0, "completions/mean_length": 315.625, "completions/mean_terminated_length": 216.20370483398438, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.5754237771034241, "epoch": 8.815068493150685, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.4943279727598602, "kl": 0.769507884979248, "learning_rate": 2.7979452054794517e-07, "loss": 0.0934, "num_tokens": 13861313.0, "reward": 0.9070937633514404, "reward_std": 0.03370615839958191, "rewards/check_gptzero_func/mean": 0.9070938229560852, "rewards/check_gptzero_func/std": 0.23507675528526306, "sampling/importance_sampling_ratio/max": 1.3477954864501953, "sampling/importance_sampling_ratio/mean": 0.9991640448570251, "sampling/importance_sampling_ratio/min": 0.6937140226364136, "sampling/sampling_logp_difference/max": 0.3656954765319824, "sampling/sampling_logp_difference/mean": 0.013015177100896835, "step": 1287 }, { "clip_ratio/high_max": 0.010459469631314278, "clip_ratio/high_mean": 0.0023541352711617947, "clip_ratio/low_mean": 0.0015437511028721929, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038978862576186657, "entropy": 0.5773834586143494, "epoch": 8.821917808219178, "grad_norm": 0.9684400105457656, "kl": 0.7660925984382629, "learning_rate": 2.7962328767123287e-07, "loss": 0.0893, "step": 1288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 314.71429443359375, "completions/mean_terminated_length": 215.25926208496094, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 0.568414568901062, "epoch": 8.82876712328767, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.1371902442679909, "kl": 0.7505950927734375, "learning_rate": 2.794520547945206e-07, "loss": 0.2055, "num_tokens": 13883791.0, "reward": 0.898358941078186, "reward_std": 0.050846319645643234, "rewards/check_gptzero_func/mean": 0.8983588814735413, "rewards/check_gptzero_func/std": 0.21047449111938477, "sampling/importance_sampling_ratio/max": 1.4280080795288086, "sampling/importance_sampling_ratio/mean": 1.0005595684051514, "sampling/importance_sampling_ratio/min": 0.6848291754722595, "sampling/sampling_logp_difference/max": 0.3785858154296875, "sampling/sampling_logp_difference/mean": 0.012983540073037148, "step": 1289 }, { "clip_ratio/high_max": 0.012673035264015198, "clip_ratio/high_mean": 0.004083934705704451, "clip_ratio/low_mean": 0.0038953174371272326, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007979252375662327, "entropy": 0.5679516792297363, "epoch": 8.835616438356164, "grad_norm": 1.0432742337592744, "kl": 0.7523034811019897, "learning_rate": 2.792808219178082e-07, "loss": 0.1996, "step": 1290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1623.0, "completions/mean_length": 332.8571472167969, "completions/mean_terminated_length": 234.07408142089844, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.735814094543457, "epoch": 8.842465753424658, "frac_reward_zero_std": 0.1428571492433548, "grad_norm": 1.1245650588848444, "kl": 0.9552993774414062, "learning_rate": 2.791095890410959e-07, "loss": 0.1418, "num_tokens": 13907051.0, "reward": 0.8627405166625977, "reward_std": 0.036212291568517685, "rewards/check_gptzero_func/mean": 0.8627404570579529, "rewards/check_gptzero_func/std": 0.2829969823360443, "sampling/importance_sampling_ratio/max": 1.4430662393569946, "sampling/importance_sampling_ratio/mean": 1.0001543760299683, "sampling/importance_sampling_ratio/min": 0.6798682808876038, "sampling/sampling_logp_difference/max": 0.38585615158081055, "sampling/sampling_logp_difference/mean": 0.016789566725492477, "step": 1291 }, { "clip_ratio/high_max": 0.00805910024791956, "clip_ratio/high_mean": 0.002878190716728568, "clip_ratio/low_mean": 0.0037280279211699963, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006606218405067921, "entropy": 0.7365494966506958, "epoch": 8.849315068493151, "grad_norm": 1.0586174219752529, "kl": 0.9516364932060242, "learning_rate": 2.789383561643835e-07, "loss": 0.1362, "step": 1292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1661.0, "completions/mean_length": 409.8750305175781, "completions/mean_terminated_length": 362.78179931640625, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.9322599768638611, "epoch": 8.856164383561644, "frac_reward_zero_std": 0.0, "grad_norm": 1.3622708197406683, "kl": 0.769572913646698, "learning_rate": 2.7876712328767123e-07, "loss": -0.0715, "num_tokens": 13935018.0, "reward": 0.8478687405586243, "reward_std": 0.0713612362742424, "rewards/check_gptzero_func/mean": 0.8478687405586243, "rewards/check_gptzero_func/std": 0.2625647783279419, "sampling/importance_sampling_ratio/max": 1.4353797435760498, "sampling/importance_sampling_ratio/mean": 1.0000823736190796, "sampling/importance_sampling_ratio/min": 0.6354941129684448, "sampling/sampling_logp_difference/max": 0.4533524513244629, "sampling/sampling_logp_difference/mean": 0.01985255442559719, "step": 1293 }, { "clip_ratio/high_max": 0.006224066484719515, "clip_ratio/high_mean": 0.00301370513625443, "clip_ratio/low_mean": 0.002155254129320383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0051689594984054565, "entropy": 0.9345095753669739, "epoch": 8.863013698630137, "grad_norm": 1.119548037607647, "kl": 0.7675517201423645, "learning_rate": 2.785958904109589e-07, "loss": -0.0772, "step": 1294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2231.0, "completions/max_terminated_length": 2231.0, "completions/mean_length": 180.80357360839844, "completions/mean_terminated_length": 180.80357360839844, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 0.5754521489143372, "epoch": 8.86986301369863, "frac_reward_zero_std": 0.0, "grad_norm": 1.7244712485854963, "kl": 1.1635611057281494, "learning_rate": 2.7842465753424653e-07, "loss": 0.2003, "num_tokens": 13949869.0, "reward": 0.9077702760696411, "reward_std": 0.10887166857719421, "rewards/check_gptzero_func/mean": 0.9077702164649963, "rewards/check_gptzero_func/std": 0.230619877576828, "sampling/importance_sampling_ratio/max": 1.3128823041915894, "sampling/importance_sampling_ratio/mean": 0.9998655319213867, "sampling/importance_sampling_ratio/min": 0.7128965854644775, "sampling/sampling_logp_difference/max": 0.33841896057128906, "sampling/sampling_logp_difference/mean": 0.01353093795478344, "step": 1295 }, { "clip_ratio/high_max": 0.009326424449682236, "clip_ratio/high_mean": 0.0017970604822039604, "clip_ratio/low_mean": 0.0035466982517391443, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005343758501112461, "entropy": 0.5791624784469604, "epoch": 8.876712328767123, "grad_norm": 1.5699533065119087, "kl": 1.1576741933822632, "learning_rate": 2.7825342465753423e-07, "loss": 0.1922, "step": 1296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1685.0, "completions/mean_length": 363.1071472167969, "completions/mean_terminated_length": 265.4444580078125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.8016089797019958, "epoch": 8.883561643835616, "frac_reward_zero_std": 0.0, "grad_norm": 1.112412320935853, "kl": 0.7525261640548706, "learning_rate": 2.7808219178082194e-07, "loss": 0.3075, "num_tokens": 13975919.0, "reward": 0.9138785004615784, "reward_std": 0.07286016643047333, "rewards/check_gptzero_func/mean": 0.9138784408569336, "rewards/check_gptzero_func/std": 0.17111386358737946, "sampling/importance_sampling_ratio/max": 1.4130834341049194, "sampling/importance_sampling_ratio/mean": 1.0001343488693237, "sampling/importance_sampling_ratio/min": 0.6442108154296875, "sampling/sampling_logp_difference/max": 0.4397292137145996, "sampling/sampling_logp_difference/mean": 0.0154477059841156, "step": 1297 }, { "clip_ratio/high_max": 0.0055601890198886395, "clip_ratio/high_mean": 0.0015855096280574799, "clip_ratio/low_mean": 0.002329402370378375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0039149122312664986, "entropy": 0.8013476133346558, "epoch": 8.89041095890411, "grad_norm": 1.052915836268492, "kl": 0.7497795224189758, "learning_rate": 2.779109589041096e-07, "loss": 0.3017, "step": 1298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1631.0, "completions/mean_length": 287.1785888671875, "completions/mean_terminated_length": 186.70370483398438, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.49363255500793457, "epoch": 8.897260273972602, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.5011595360463872, "kl": 1.111739993095398, "learning_rate": 2.7773972602739724e-07, "loss": 0.1224, "num_tokens": 13996577.0, "reward": 0.9115732312202454, "reward_std": 0.07399317622184753, "rewards/check_gptzero_func/mean": 0.9115732908248901, "rewards/check_gptzero_func/std": 0.2379985898733139, "sampling/importance_sampling_ratio/max": 1.3983417749404907, "sampling/importance_sampling_ratio/mean": 1.0004041194915771, "sampling/importance_sampling_ratio/min": 0.5435425639152527, "sampling/sampling_logp_difference/max": 0.609647274017334, "sampling/sampling_logp_difference/mean": 0.011631953530013561, "step": 1299 }, { "clip_ratio/high_max": 0.008934480138123035, "clip_ratio/high_mean": 0.002755336696282029, "clip_ratio/low_mean": 0.0016321329167112708, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004387469496577978, "entropy": 0.49742838740348816, "epoch": 8.904109589041095, "grad_norm": 1.1377000775420516, "kl": 1.088383436203003, "learning_rate": 2.7756849315068494e-07, "loss": 0.1161, "step": 1300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1601.0, "completions/max_terminated_length": 1601.0, "completions/mean_length": 169.85714721679688, "completions/mean_terminated_length": 169.85714721679688, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.744137167930603, "epoch": 8.91095890410959, "frac_reward_zero_std": 0.0, "grad_norm": 1.7098710016121634, "kl": 0.9732649922370911, "learning_rate": 2.773972602739726e-07, "loss": 0.0292, "num_tokens": 14010857.0, "reward": 0.9348790645599365, "reward_std": 0.03217782452702522, "rewards/check_gptzero_func/mean": 0.9348790049552917, "rewards/check_gptzero_func/std": 0.15721550583839417, "sampling/importance_sampling_ratio/max": 1.4384044408798218, "sampling/importance_sampling_ratio/mean": 1.0005511045455933, "sampling/importance_sampling_ratio/min": 0.6368674039840698, "sampling/sampling_logp_difference/max": 0.45119380950927734, "sampling/sampling_logp_difference/mean": 0.013418096117675304, "step": 1301 }, { "clip_ratio/high_max": 0.010629921220242977, "clip_ratio/high_mean": 0.003446349874138832, "clip_ratio/low_mean": 0.0027030573692172766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006149407476186752, "entropy": 0.7441068291664124, "epoch": 8.917808219178083, "grad_norm": 1.5395430179494793, "kl": 0.97321617603302, "learning_rate": 2.7722602739726024e-07, "loss": 0.0205, "step": 1302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1969.0, "completions/mean_length": 241.37501525878906, "completions/mean_terminated_length": 191.21817016601562, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.8158628344535828, "epoch": 8.924657534246576, "frac_reward_zero_std": 0.0, "grad_norm": 1.303308260282464, "kl": 0.9895481467247009, "learning_rate": 2.7705479452054794e-07, "loss": 0.3451, "num_tokens": 14029546.0, "reward": 0.951733410358429, "reward_std": 0.03616320714354515, "rewards/check_gptzero_func/mean": 0.951733410358429, "rewards/check_gptzero_func/std": 0.16142132878303528, "sampling/importance_sampling_ratio/max": 1.327413558959961, "sampling/importance_sampling_ratio/mean": 0.9999417066574097, "sampling/importance_sampling_ratio/min": 0.7366152405738831, "sampling/sampling_logp_difference/max": 0.30568957328796387, "sampling/sampling_logp_difference/mean": 0.01551980059593916, "step": 1303 }, { "clip_ratio/high_max": 0.008990110829472542, "clip_ratio/high_mean": 0.0018046458717435598, "clip_ratio/low_mean": 0.0021897386759519577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003994384314864874, "entropy": 0.8191741108894348, "epoch": 8.931506849315069, "grad_norm": 1.2294737238021376, "kl": 0.9651101231575012, "learning_rate": 2.768835616438356e-07, "loss": 0.3384, "step": 1304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 210.46429443359375, "completions/mean_terminated_length": 159.74545288085938, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.5996495485305786, "epoch": 8.938356164383562, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.4556251497258468, "kl": 1.1532975435256958, "learning_rate": 2.7671232876712325e-07, "loss": 0.2012, "num_tokens": 14045918.0, "reward": 0.9379947185516357, "reward_std": 0.01973533257842064, "rewards/check_gptzero_func/mean": 0.937994658946991, "rewards/check_gptzero_func/std": 0.15303745865821838, "sampling/importance_sampling_ratio/max": 1.4055391550064087, "sampling/importance_sampling_ratio/mean": 0.9996622204780579, "sampling/importance_sampling_ratio/min": 0.5790150761604309, "sampling/sampling_logp_difference/max": 0.5464267730712891, "sampling/sampling_logp_difference/mean": 0.011646286584436893, "step": 1305 }, { "clip_ratio/high_max": 0.011535688303411007, "clip_ratio/high_mean": 0.0016479555051773787, "clip_ratio/low_mean": 0.0011571563081815839, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0028051116969436407, "entropy": 0.6008380055427551, "epoch": 8.945205479452055, "grad_norm": 1.3354594357928014, "kl": 1.1467159986495972, "learning_rate": 2.7654109589041095e-07, "loss": 0.194, "step": 1306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1065.0, "completions/max_terminated_length": 1065.0, "completions/mean_length": 111.00000762939453, "completions/mean_terminated_length": 111.00000762939453, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.6190610527992249, "epoch": 8.952054794520548, "frac_reward_zero_std": 0.0, "grad_norm": 1.9006539085713976, "kl": 1.1299322843551636, "learning_rate": 2.7636986301369865e-07, "loss": 0.1676, "num_tokens": 14056838.0, "reward": 0.9504759311676025, "reward_std": 0.04836220294237137, "rewards/check_gptzero_func/mean": 0.9504758715629578, "rewards/check_gptzero_func/std": 0.159490704536438, "sampling/importance_sampling_ratio/max": 1.264744520187378, "sampling/importance_sampling_ratio/mean": 0.9995831251144409, "sampling/importance_sampling_ratio/min": 0.7767338156700134, "sampling/sampling_logp_difference/max": 0.2526576519012451, "sampling/sampling_logp_difference/mean": 0.012346767820417881, "step": 1307 }, { "clip_ratio/high_max": 0.013278855942189693, "clip_ratio/high_mean": 0.0023001173976808786, "clip_ratio/low_mean": 0.004194773733615875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0064948913641273975, "entropy": 0.6216015219688416, "epoch": 8.95890410958904, "grad_norm": 1.6289172937252838, "kl": 1.1292498111724854, "learning_rate": 2.761986301369863e-07, "loss": 0.1592, "step": 1308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1051.0, "completions/max_terminated_length": 1051.0, "completions/mean_length": 111.85714721679688, "completions/mean_terminated_length": 111.85714721679688, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.8028931021690369, "epoch": 8.965753424657533, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.4783701256789012, "kl": 1.3062341213226318, "learning_rate": 2.7602739726027395e-07, "loss": 0.1891, "num_tokens": 14068260.0, "reward": 0.9264717698097229, "reward_std": 0.0944739356637001, "rewards/check_gptzero_func/mean": 0.9264717102050781, "rewards/check_gptzero_func/std": 0.19288292527198792, "sampling/importance_sampling_ratio/max": 1.2730647325515747, "sampling/importance_sampling_ratio/mean": 1.0000032186508179, "sampling/importance_sampling_ratio/min": 0.7996136546134949, "sampling/sampling_logp_difference/max": 0.24142718315124512, "sampling/sampling_logp_difference/mean": 0.012967913411557674, "step": 1309 }, { "clip_ratio/high_max": 0.0023752970155328512, "clip_ratio/high_mean": 0.0010767905041575432, "clip_ratio/low_mean": 0.0031266321893781424, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004203422926366329, "entropy": 0.8035345077514648, "epoch": 8.972602739726028, "grad_norm": 1.3935225728552614, "kl": 1.2948964834213257, "learning_rate": 2.758561643835616e-07, "loss": 0.1829, "step": 1310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1974.0, "completions/mean_length": 242.85714721679688, "completions/mean_terminated_length": 192.72726440429688, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.6337499618530273, "epoch": 8.979452054794521, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.409746447479282, "kl": 0.9050299525260925, "learning_rate": 2.756849315068493e-07, "loss": 0.161, "num_tokens": 14087382.0, "reward": 0.9405207633972168, "reward_std": 0.03570374473929405, "rewards/check_gptzero_func/mean": 0.940520703792572, "rewards/check_gptzero_func/std": 0.13726352155208588, "sampling/importance_sampling_ratio/max": 1.350054383277893, "sampling/importance_sampling_ratio/mean": 1.0002877712249756, "sampling/importance_sampling_ratio/min": 0.6985158920288086, "sampling/sampling_logp_difference/max": 0.3587973117828369, "sampling/sampling_logp_difference/mean": 0.013151870109140873, "step": 1311 }, { "clip_ratio/high_max": 0.011113982647657394, "clip_ratio/high_mean": 0.0034416760317981243, "clip_ratio/low_mean": 0.0021435555536299944, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005585231352597475, "entropy": 0.6353228688240051, "epoch": 8.986301369863014, "grad_norm": 1.2873517862932835, "kl": 0.9032474160194397, "learning_rate": 2.7551369863013696e-07, "loss": 0.1539, "step": 1312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1615.0, "completions/mean_length": 189.19644165039062, "completions/mean_terminated_length": 138.09091186523438, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.76139897108078, "epoch": 8.993150684931507, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.560871278324428, "kl": 1.2392120361328125, "learning_rate": 2.753424657534246e-07, "loss": 0.2113, "num_tokens": 14102615.0, "reward": 0.9240757822990417, "reward_std": 0.052222393453121185, "rewards/check_gptzero_func/mean": 0.924075722694397, "rewards/check_gptzero_func/std": 0.19920514523983002, "sampling/importance_sampling_ratio/max": 1.4582148790359497, "sampling/importance_sampling_ratio/mean": 1.0002816915512085, "sampling/importance_sampling_ratio/min": 0.6699788570404053, "sampling/sampling_logp_difference/max": 0.4005091190338135, "sampling/sampling_logp_difference/mean": 0.014643062837421894, "step": 1313 }, { "clip_ratio/high_max": 0.010478617623448372, "clip_ratio/high_mean": 0.0023960371036082506, "clip_ratio/low_mean": 0.0027232500724494457, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005119286477565765, "entropy": 0.7609777450561523, "epoch": 9.0, "grad_norm": 1.419537486650697, "kl": 1.2256505489349365, "learning_rate": 2.751712328767123e-07, "loss": 0.204, "step": 1314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1918.0, "completions/max_terminated_length": 1918.0, "completions/mean_length": 192.71429443359375, "completions/mean_terminated_length": 192.71429443359375, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.4083692133426666, "epoch": 9.006849315068493, "frac_reward_zero_std": 0.0, "grad_norm": 1.6205222595271176, "kl": 1.2484112977981567, "learning_rate": 2.75e-07, "loss": 0.015, "num_tokens": 14118423.0, "reward": 0.9175766110420227, "reward_std": 0.022818194702267647, "rewards/check_gptzero_func/mean": 0.9175766110420227, "rewards/check_gptzero_func/std": 0.19597479701042175, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9996533989906311, "sampling/importance_sampling_ratio/min": 0.7279982566833496, "sampling/sampling_logp_difference/max": 0.7067887783050537, "sampling/sampling_logp_difference/mean": 0.009229114279150963, "step": 1315 }, { "clip_ratio/high_max": 0.008652657270431519, "clip_ratio/high_mean": 0.002614285098388791, "clip_ratio/low_mean": 0.0017244609771296382, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0043387459591031075, "entropy": 0.4109551012516022, "epoch": 9.013698630136986, "grad_norm": 1.445360899091704, "kl": 1.248154640197754, "learning_rate": 2.7482876712328766e-07, "loss": 0.0073, "step": 1316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 2005.0, "completions/mean_length": 447.58929443359375, "completions/mean_terminated_length": 353.0555725097656, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 0.7771965861320496, "epoch": 9.020547945205479, "frac_reward_zero_std": 0.0, "grad_norm": 1.0750117104340877, "kl": 0.5009194016456604, "learning_rate": 2.746575342465753e-07, "loss": -0.0186, "num_tokens": 14148196.0, "reward": 0.9078214168548584, "reward_std": 0.01908981427550316, "rewards/check_gptzero_func/mean": 0.9078213572502136, "rewards/check_gptzero_func/std": 0.18322275578975677, "sampling/importance_sampling_ratio/max": 1.4117172956466675, "sampling/importance_sampling_ratio/mean": 1.0002485513687134, "sampling/importance_sampling_ratio/min": 0.7006928324699402, "sampling/sampling_logp_difference/max": 0.3556857109069824, "sampling/sampling_logp_difference/mean": 0.018795054405927658, "step": 1317 }, { "clip_ratio/high_max": 0.007952974177896976, "clip_ratio/high_mean": 0.004449959844350815, "clip_ratio/low_mean": 0.00238814577460289, "clip_ratio/low_min": 0.0002540005079936236, "clip_ratio/region_mean": 0.006838105618953705, "entropy": 0.7765244245529175, "epoch": 9.027397260273972, "grad_norm": 1.017864624037629, "kl": 0.49954304099082947, "learning_rate": 2.74486301369863e-07, "loss": -0.0242, "step": 1318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2087.0, "completions/mean_length": 193.73214721679688, "completions/mean_terminated_length": 142.70909118652344, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.7395788431167603, "epoch": 9.034246575342467, "frac_reward_zero_std": 0.0, "grad_norm": 1.1347515891422537, "kl": 0.9595162272453308, "learning_rate": 2.7431506849315067e-07, "loss": 0.2694, "num_tokens": 14163687.0, "reward": 0.9785687923431396, "reward_std": 0.010717827826738358, "rewards/check_gptzero_func/mean": 0.9785687327384949, "rewards/check_gptzero_func/std": 0.07291574776172638, "sampling/importance_sampling_ratio/max": 1.395779013633728, "sampling/importance_sampling_ratio/mean": 0.9995768666267395, "sampling/importance_sampling_ratio/min": 0.6898588538169861, "sampling/sampling_logp_difference/max": 0.37126827239990234, "sampling/sampling_logp_difference/mean": 0.013090623542666435, "step": 1319 }, { "clip_ratio/high_max": 0.007499999832361937, "clip_ratio/high_mean": 0.0017013510223478079, "clip_ratio/low_mean": 0.002281057881191373, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003982408903539181, "entropy": 0.7398566007614136, "epoch": 9.04109589041096, "grad_norm": 1.0661560191693265, "kl": 0.9584660530090332, "learning_rate": 2.741438356164383e-07, "loss": 0.2644, "step": 1320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1868.0, "completions/mean_length": 210.3928680419922, "completions/mean_terminated_length": 159.6727294921875, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.42754438519477844, "epoch": 9.047945205479452, "frac_reward_zero_std": 0.0, "grad_norm": 1.0776604883511467, "kl": 0.9465128779411316, "learning_rate": 2.73972602739726e-07, "loss": 0.1317, "num_tokens": 14180613.0, "reward": 0.9593451023101807, "reward_std": 0.02144494280219078, "rewards/check_gptzero_func/mean": 0.9593450427055359, "rewards/check_gptzero_func/std": 0.1343361884355545, "sampling/importance_sampling_ratio/max": 1.3318825960159302, "sampling/importance_sampling_ratio/mean": 1.0000628232955933, "sampling/importance_sampling_ratio/min": 0.6359285116195679, "sampling/sampling_logp_difference/max": 0.4526691436767578, "sampling/sampling_logp_difference/mean": 0.00973956473171711, "step": 1321 }, { "clip_ratio/high_max": 0.011784995906054974, "clip_ratio/high_mean": 0.002563792048022151, "clip_ratio/low_mean": 0.0028738160617649555, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00543760834261775, "entropy": 0.4280490577220917, "epoch": 9.054794520547945, "grad_norm": 0.9645943304724401, "kl": 0.9469738006591797, "learning_rate": 2.7380136986301367e-07, "loss": 0.1266, "step": 1322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1736.0, "completions/mean_length": 259.5, "completions/mean_terminated_length": 158.0, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 0.9564574956893921, "epoch": 9.061643835616438, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.5636766181596855, "kl": 0.9857200980186462, "learning_rate": 2.736301369863013e-07, "loss": 0.2614, "num_tokens": 14199715.0, "reward": 0.9118990898132324, "reward_std": 0.06728176772594452, "rewards/check_gptzero_func/mean": 0.9118990302085876, "rewards/check_gptzero_func/std": 0.22496023774147034, "sampling/importance_sampling_ratio/max": 1.2876616716384888, "sampling/importance_sampling_ratio/mean": 1.000030755996704, "sampling/importance_sampling_ratio/min": 0.6403866410255432, "sampling/sampling_logp_difference/max": 0.44568324089050293, "sampling/sampling_logp_difference/mean": 0.01718759909272194, "step": 1323 }, { "clip_ratio/high_max": 0.007430582772940397, "clip_ratio/high_mean": 0.0016370804514735937, "clip_ratio/low_mean": 0.005055212415754795, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00669229356572032, "entropy": 0.9569153785705566, "epoch": 9.068493150684931, "grad_norm": 1.3574317886164318, "kl": 0.9781011939048767, "learning_rate": 2.73458904109589e-07, "loss": 0.2538, "step": 1324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2138.0, "completions/max_terminated_length": 2138.0, "completions/mean_length": 233.3928680419922, "completions/mean_terminated_length": 233.3928680419922, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.9883078336715698, "epoch": 9.075342465753424, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.564668117604746, "kl": 1.049075961112976, "learning_rate": 2.7328767123287673e-07, "loss": 0.1497, "num_tokens": 14217353.0, "reward": 0.8704267144203186, "reward_std": 0.041133780032396317, "rewards/check_gptzero_func/mean": 0.8704266548156738, "rewards/check_gptzero_func/std": 0.2722349762916565, "sampling/importance_sampling_ratio/max": 1.389060139656067, "sampling/importance_sampling_ratio/mean": 0.9998343586921692, "sampling/importance_sampling_ratio/min": 0.7121772766113281, "sampling/sampling_logp_difference/max": 0.3394284248352051, "sampling/sampling_logp_difference/mean": 0.01843983680009842, "step": 1325 }, { "clip_ratio/high_max": 0.0033789791632443666, "clip_ratio/high_mean": 0.0015655463794246316, "clip_ratio/low_mean": 0.002291150623932481, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003856697352603078, "entropy": 0.9858840107917786, "epoch": 9.082191780821917, "grad_norm": 1.493376989484439, "kl": 1.046990156173706, "learning_rate": 2.731164383561644e-07, "loss": 0.1422, "step": 1326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1988.0, "completions/mean_length": 293.2321472167969, "completions/mean_terminated_length": 244.01817321777344, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "entropy": 0.9891599416732788, "epoch": 9.08904109589041, "frac_reward_zero_std": 0.0, "grad_norm": 1.4229777923224511, "kl": 0.8773381114006042, "learning_rate": 2.7294520547945203e-07, "loss": 0.0582, "num_tokens": 14238372.0, "reward": 0.9059511423110962, "reward_std": 0.02460992895066738, "rewards/check_gptzero_func/mean": 0.9059510827064514, "rewards/check_gptzero_func/std": 0.20914329588413239, "sampling/importance_sampling_ratio/max": 1.498001217842102, "sampling/importance_sampling_ratio/mean": 0.9997128248214722, "sampling/importance_sampling_ratio/min": 0.4710952043533325, "sampling/sampling_logp_difference/max": 0.7526950836181641, "sampling/sampling_logp_difference/mean": 0.019799549132585526, "step": 1327 }, { "clip_ratio/high_max": 0.011387163773179054, "clip_ratio/high_mean": 0.004626773763448, "clip_ratio/low_mean": 0.0016210267785936594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006247800309211016, "entropy": 0.9891820549964905, "epoch": 9.095890410958905, "grad_norm": 1.3474662730735674, "kl": 0.8780456781387329, "learning_rate": 2.7277397260273973e-07, "loss": 0.0509, "step": 1328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2376.0, "completions/max_terminated_length": 2376.0, "completions/mean_length": 268.4464416503906, "completions/mean_terminated_length": 268.4464416503906, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.6907389760017395, "epoch": 9.102739726027398, "frac_reward_zero_std": 0.0, "grad_norm": 1.4543186439249627, "kl": 0.9969691634178162, "learning_rate": 2.726027397260274e-07, "loss": -0.0043, "num_tokens": 14258807.0, "reward": 0.8967463970184326, "reward_std": 0.03301984444260597, "rewards/check_gptzero_func/mean": 0.8967463374137878, "rewards/check_gptzero_func/std": 0.23308789730072021, "sampling/importance_sampling_ratio/max": 1.3542896509170532, "sampling/importance_sampling_ratio/mean": 1.0001804828643799, "sampling/importance_sampling_ratio/min": 0.6307835578918457, "sampling/sampling_logp_difference/max": 0.46079254150390625, "sampling/sampling_logp_difference/mean": 0.014143060892820358, "step": 1329 }, { "clip_ratio/high_max": 0.006348758470267057, "clip_ratio/high_mean": 0.0028963088989257812, "clip_ratio/low_mean": 0.001630560727789998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004526869393885136, "entropy": 0.6943153738975525, "epoch": 9.10958904109589, "grad_norm": 1.3627628865891317, "kl": 0.9972198605537415, "learning_rate": 2.7243150684931503e-07, "loss": -0.0121, "step": 1330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1653.0, "completions/mean_length": 390.7321472167969, "completions/mean_terminated_length": 294.09259033203125, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.8652942776679993, "epoch": 9.116438356164384, "frac_reward_zero_std": 0.0, "grad_norm": 1.2377987652136102, "kl": 1.0006967782974243, "learning_rate": 2.722602739726027e-07, "loss": 0.2028, "num_tokens": 14285028.0, "reward": 0.861588716506958, "reward_std": 0.053710609674453735, "rewards/check_gptzero_func/mean": 0.8615886569023132, "rewards/check_gptzero_func/std": 0.2695215344429016, "sampling/importance_sampling_ratio/max": 1.457740068435669, "sampling/importance_sampling_ratio/mean": 1.0003972053527832, "sampling/importance_sampling_ratio/min": 0.5243854522705078, "sampling/sampling_logp_difference/max": 0.6455283164978027, "sampling/sampling_logp_difference/mean": 0.018928494304418564, "step": 1331 }, { "clip_ratio/high_max": 0.007988380268216133, "clip_ratio/high_mean": 0.004054344724863768, "clip_ratio/low_mean": 0.0018787457374855876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005933091044425964, "entropy": 0.8630115389823914, "epoch": 9.123287671232877, "grad_norm": 1.1626686095351577, "kl": 0.9991026520729065, "learning_rate": 2.7208904109589044e-07, "loss": 0.1968, "step": 1332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2091.0, "completions/max_terminated_length": 2091.0, "completions/mean_length": 347.89288330078125, "completions/mean_terminated_length": 347.89288330078125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.7116333842277527, "epoch": 9.13013698630137, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.4371092495909852, "kl": 0.8947153091430664, "learning_rate": 2.719178082191781e-07, "loss": 0.0604, "num_tokens": 14309314.0, "reward": 0.8880503177642822, "reward_std": 0.03625425323843956, "rewards/check_gptzero_func/mean": 0.8880502581596375, "rewards/check_gptzero_func/std": 0.1975230872631073, "sampling/importance_sampling_ratio/max": 1.8238537311553955, "sampling/importance_sampling_ratio/mean": 0.9999682307243347, "sampling/importance_sampling_ratio/min": 0.6538617610931396, "sampling/sampling_logp_difference/max": 0.6009516716003418, "sampling/sampling_logp_difference/mean": 0.018224472180008888, "step": 1333 }, { "clip_ratio/high_max": 0.00730542279779911, "clip_ratio/high_mean": 0.0038034303579479456, "clip_ratio/low_mean": 0.0029456859920173883, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006749116815626621, "entropy": 0.7126561999320984, "epoch": 9.136986301369863, "grad_norm": 1.2894930770584754, "kl": 0.8624057769775391, "learning_rate": 2.7174657534246574e-07, "loss": 0.053, "step": 1334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1874.0, "completions/max_terminated_length": 1874.0, "completions/mean_length": 175.82144165039062, "completions/mean_terminated_length": 175.82144165039062, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.8111650347709656, "epoch": 9.143835616438356, "frac_reward_zero_std": 0.0, "grad_norm": 1.7834125095188218, "kl": 1.2117338180541992, "learning_rate": 2.7157534246575345e-07, "loss": 0.0851, "num_tokens": 14323824.0, "reward": 0.9433720111846924, "reward_std": 0.05739504471421242, "rewards/check_gptzero_func/mean": 0.9433719515800476, "rewards/check_gptzero_func/std": 0.15203337371349335, "sampling/importance_sampling_ratio/max": 1.4596668481826782, "sampling/importance_sampling_ratio/mean": 1.000581979751587, "sampling/importance_sampling_ratio/min": 0.6685027480125427, "sampling/sampling_logp_difference/max": 0.40271472930908203, "sampling/sampling_logp_difference/mean": 0.014237153343856335, "step": 1335 }, { "clip_ratio/high_max": 0.013269905000925064, "clip_ratio/high_mean": 0.002504148054867983, "clip_ratio/low_mean": 0.0017995965899899602, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004303744528442621, "entropy": 0.8152590990066528, "epoch": 9.150684931506849, "grad_norm": 1.6274889397282732, "kl": 1.2005010843276978, "learning_rate": 2.714041095890411e-07, "loss": 0.077, "step": 1336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 2190.0, "completions/mean_length": 294.8214416503906, "completions/mean_terminated_length": 194.629638671875, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 1.062503457069397, "epoch": 9.157534246575343, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.1951742250607442, "kl": 0.9238007664680481, "learning_rate": 2.7123287671232875e-07, "loss": -0.0008, "num_tokens": 14345690.0, "reward": 0.9557597041130066, "reward_std": 0.040839385241270065, "rewards/check_gptzero_func/mean": 0.9557596445083618, "rewards/check_gptzero_func/std": 0.1525745540857315, "sampling/importance_sampling_ratio/max": 1.509300708770752, "sampling/importance_sampling_ratio/mean": 0.9998393654823303, "sampling/importance_sampling_ratio/min": 0.7358543276786804, "sampling/sampling_logp_difference/max": 0.41164636611938477, "sampling/sampling_logp_difference/mean": 0.017197906970977783, "step": 1337 }, { "clip_ratio/high_max": 0.007966747507452965, "clip_ratio/high_mean": 0.0016181744867935777, "clip_ratio/low_mean": 0.0014948701718822122, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00311304465867579, "entropy": 1.064629316329956, "epoch": 9.164383561643836, "grad_norm": 1.1364824474949677, "kl": 0.9240192770957947, "learning_rate": 2.710616438356164e-07, "loss": -0.0065, "step": 1338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1560.0, "completions/max_terminated_length": 1560.0, "completions/mean_length": 188.48214721679688, "completions/mean_terminated_length": 188.48214721679688, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.5660509467124939, "epoch": 9.17123287671233, "frac_reward_zero_std": 0.0, "grad_norm": 1.6054104214092122, "kl": 0.8815112709999084, "learning_rate": 2.708904109589041e-07, "loss": 0.1491, "num_tokens": 14361927.0, "reward": 0.9516738653182983, "reward_std": 0.024237599223852158, "rewards/check_gptzero_func/mean": 0.9516738057136536, "rewards/check_gptzero_func/std": 0.1511753648519516, "sampling/importance_sampling_ratio/max": 1.2835406064987183, "sampling/importance_sampling_ratio/mean": 0.9995487928390503, "sampling/importance_sampling_ratio/min": 0.6900789141654968, "sampling/sampling_logp_difference/max": 0.37094926834106445, "sampling/sampling_logp_difference/mean": 0.011515771970152855, "step": 1339 }, { "clip_ratio/high_max": 0.009577864781022072, "clip_ratio/high_mean": 0.0025624341797083616, "clip_ratio/low_mean": 0.0030817617662250996, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005644196178764105, "entropy": 0.5684835314750671, "epoch": 9.178082191780822, "grad_norm": 1.4794853941606732, "kl": 0.8781293630599976, "learning_rate": 2.7071917808219175e-07, "loss": 0.1409, "step": 1340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1665.0, "completions/max_terminated_length": 1665.0, "completions/mean_length": 180.7678680419922, "completions/mean_terminated_length": 180.7678680419922, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.003579020500183, "epoch": 9.184931506849315, "frac_reward_zero_std": 0.0, "grad_norm": 1.7734700619895158, "kl": 1.2578659057617188, "learning_rate": 2.7054794520547945e-07, "loss": 0.182, "num_tokens": 14376770.0, "reward": 0.9061779975891113, "reward_std": 0.06120239198207855, "rewards/check_gptzero_func/mean": 0.9061779379844666, "rewards/check_gptzero_func/std": 0.20563830435276031, "sampling/importance_sampling_ratio/max": 1.3691562414169312, "sampling/importance_sampling_ratio/mean": 1.0001251697540283, "sampling/importance_sampling_ratio/min": 0.7250327467918396, "sampling/sampling_logp_difference/max": 0.32153844833374023, "sampling/sampling_logp_difference/mean": 0.018347974866628647, "step": 1341 }, { "clip_ratio/high_max": 0.00863398052752018, "clip_ratio/high_mean": 0.002769938437268138, "clip_ratio/low_mean": 0.0020542291458696127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004824168048799038, "entropy": 1.0058127641677856, "epoch": 9.191780821917808, "grad_norm": 1.6462559732165218, "kl": 1.2159923315048218, "learning_rate": 2.7037671232876716e-07, "loss": 0.1739, "step": 1342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2260.0, "completions/mean_length": 278.0535888671875, "completions/mean_terminated_length": 228.56362915039062, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.8203477263450623, "epoch": 9.198630136986301, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.5090183770054437, "kl": 0.9883297681808472, "learning_rate": 2.702054794520548e-07, "loss": -0.0294, "num_tokens": 14397397.0, "reward": 0.9163076281547546, "reward_std": 0.05835948511958122, "rewards/check_gptzero_func/mean": 0.9163075685501099, "rewards/check_gptzero_func/std": 0.20327642560005188, "sampling/importance_sampling_ratio/max": 1.3660587072372437, "sampling/importance_sampling_ratio/mean": 0.9997978806495667, "sampling/importance_sampling_ratio/min": 0.5916305184364319, "sampling/sampling_logp_difference/max": 0.5248730182647705, "sampling/sampling_logp_difference/mean": 0.016597649082541466, "step": 1343 }, { "clip_ratio/high_max": 0.010834049433469772, "clip_ratio/high_mean": 0.0037861482705920935, "clip_ratio/low_mean": 0.0018491876544430852, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0056353360414505005, "entropy": 0.824930727481842, "epoch": 9.205479452054794, "grad_norm": 1.3072517186103019, "kl": 0.9752679467201233, "learning_rate": 2.7003424657534246e-07, "loss": -0.0363, "step": 1344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1686.0, "completions/max_terminated_length": 1686.0, "completions/mean_length": 129.08929443359375, "completions/mean_terminated_length": 129.08929443359375, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 0.6991440057754517, "epoch": 9.212328767123287, "frac_reward_zero_std": 0.0, "grad_norm": 1.9537894875277328, "kl": 1.3601475954055786, "learning_rate": 2.698630136986301e-07, "loss": 0.0206, "num_tokens": 14408982.0, "reward": 0.9430986642837524, "reward_std": 0.048125918954610825, "rewards/check_gptzero_func/mean": 0.9430986046791077, "rewards/check_gptzero_func/std": 0.16484248638153076, "sampling/importance_sampling_ratio/max": 1.3735089302062988, "sampling/importance_sampling_ratio/mean": 0.999963104724884, "sampling/importance_sampling_ratio/min": 0.7218546271324158, "sampling/sampling_logp_difference/max": 0.3259315490722656, "sampling/sampling_logp_difference/mean": 0.01327479537576437, "step": 1345 }, { "clip_ratio/high_max": 0.016491327434778214, "clip_ratio/high_mean": 0.003056043293327093, "clip_ratio/low_mean": 0.0023705989588052034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005426642019301653, "entropy": 0.7004987001419067, "epoch": 9.219178082191782, "grad_norm": 1.7796798266817553, "kl": 1.360125184059143, "learning_rate": 2.696917808219178e-07, "loss": 0.0115, "step": 1346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1627.0, "completions/mean_length": 255.3035888671875, "completions/mean_terminated_length": 205.39999389648438, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 0.7573066353797913, "epoch": 9.226027397260275, "frac_reward_zero_std": 0.0, "grad_norm": 1.3736937997001917, "kl": 0.9322295784950256, "learning_rate": 2.6952054794520546e-07, "loss": 0.1637, "num_tokens": 14427913.0, "reward": 0.917445957660675, "reward_std": 0.041910815984010696, "rewards/check_gptzero_func/mean": 0.917445957660675, "rewards/check_gptzero_func/std": 0.18467053771018982, "sampling/importance_sampling_ratio/max": 1.2923831939697266, "sampling/importance_sampling_ratio/mean": 0.999913215637207, "sampling/importance_sampling_ratio/min": 0.7153558135032654, "sampling/sampling_logp_difference/max": 0.3349752426147461, "sampling/sampling_logp_difference/mean": 0.015340914018452168, "step": 1347 }, { "clip_ratio/high_max": 0.010873377323150635, "clip_ratio/high_mean": 0.003127981210127473, "clip_ratio/low_mean": 0.0027217164169996977, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005849698092788458, "entropy": 0.7578312158584595, "epoch": 9.232876712328768, "grad_norm": 1.2636294815886704, "kl": 0.9294114708900452, "learning_rate": 2.693493150684931e-07, "loss": 0.1571, "step": 1348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1925.0, "completions/max_terminated_length": 1925.0, "completions/mean_length": 174.17857360839844, "completions/mean_terminated_length": 174.17857360839844, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "entropy": 1.053117036819458, "epoch": 9.23972602739726, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.717773007597811, "kl": 1.1774959564208984, "learning_rate": 2.6917808219178076e-07, "loss": 0.0972, "num_tokens": 14442309.0, "reward": 0.9438005685806274, "reward_std": 0.014812279492616653, "rewards/check_gptzero_func/mean": 0.9438005089759827, "rewards/check_gptzero_func/std": 0.1697387844324112, "sampling/importance_sampling_ratio/max": 1.4619437456130981, "sampling/importance_sampling_ratio/mean": 0.9996242523193359, "sampling/importance_sampling_ratio/min": 0.7346083521842957, "sampling/sampling_logp_difference/max": 0.37976694107055664, "sampling/sampling_logp_difference/mean": 0.01779613271355629, "step": 1349 }, { "clip_ratio/high_max": 0.006365900859236717, "clip_ratio/high_mean": 0.0014246172504499555, "clip_ratio/low_mean": 0.001304986304603517, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002729603787884116, "entropy": 1.0503884553909302, "epoch": 9.246575342465754, "grad_norm": 1.6641030027094403, "kl": 1.1680492162704468, "learning_rate": 2.690068493150685e-07, "loss": 0.0889, "step": 1350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2393.0, "completions/max_terminated_length": 2393.0, "completions/mean_length": 407.58929443359375, "completions/mean_terminated_length": 407.58929443359375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 1.149139404296875, "epoch": 9.253424657534246, "frac_reward_zero_std": 0.0, "grad_norm": 1.2550030701723915, "kl": 1.0541774034500122, "learning_rate": 2.6883561643835617e-07, "loss": 0.0221, "num_tokens": 14470020.0, "reward": 0.8451312780380249, "reward_std": 0.05753146857023239, "rewards/check_gptzero_func/mean": 0.8451312780380249, "rewards/check_gptzero_func/std": 0.2851499915122986, "sampling/importance_sampling_ratio/max": 1.4259978532791138, "sampling/importance_sampling_ratio/mean": 0.9996786117553711, "sampling/importance_sampling_ratio/min": 0.6259247660636902, "sampling/sampling_logp_difference/max": 0.4685251712799072, "sampling/sampling_logp_difference/mean": 0.021421749144792557, "step": 1351 }, { "clip_ratio/high_max": 0.00625451048836112, "clip_ratio/high_mean": 0.0018796289805322886, "clip_ratio/low_mean": 0.0017784461379051208, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036580751184374094, "entropy": 1.1481990814208984, "epoch": 9.26027397260274, "grad_norm": 1.1940811401946472, "kl": 1.0481994152069092, "learning_rate": 2.686643835616438e-07, "loss": 0.016, "step": 1352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1841.0, "completions/max_terminated_length": 1841.0, "completions/mean_length": 264.7321472167969, "completions/mean_terminated_length": 264.7321472167969, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 1.002379298210144, "epoch": 9.267123287671232, "frac_reward_zero_std": 0.0, "grad_norm": 1.5587855524859398, "kl": 0.980364203453064, "learning_rate": 2.684931506849315e-07, "loss": -0.0305, "num_tokens": 14489775.0, "reward": 0.8912916779518127, "reward_std": 0.0744038000702858, "rewards/check_gptzero_func/mean": 0.891291618347168, "rewards/check_gptzero_func/std": 0.23689621686935425, "sampling/importance_sampling_ratio/max": 1.3746087551116943, "sampling/importance_sampling_ratio/mean": 0.9997444748878479, "sampling/importance_sampling_ratio/min": 0.7398656010627747, "sampling/sampling_logp_difference/max": 0.31816911697387695, "sampling/sampling_logp_difference/mean": 0.019770845770835876, "step": 1353 }, { "clip_ratio/high_max": 0.00807319674640894, "clip_ratio/high_mean": 0.0034947183448821306, "clip_ratio/low_mean": 0.002076069824397564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005570788402110338, "entropy": 1.0032994747161865, "epoch": 9.273972602739725, "grad_norm": 1.4669973909141474, "kl": 0.9737931489944458, "learning_rate": 2.6832191780821917e-07, "loss": -0.0386, "step": 1354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1853.0, "completions/max_terminated_length": 1853.0, "completions/mean_length": 275.3035888671875, "completions/mean_terminated_length": 275.3035888671875, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.9796693921089172, "epoch": 9.280821917808218, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.558451919266468, "kl": 0.8306617736816406, "learning_rate": 2.681506849315068e-07, "loss": 0.0609, "num_tokens": 14510180.0, "reward": 0.9219423532485962, "reward_std": 0.034495893865823746, "rewards/check_gptzero_func/mean": 0.9219422936439514, "rewards/check_gptzero_func/std": 0.1740301251411438, "sampling/importance_sampling_ratio/max": 1.5314035415649414, "sampling/importance_sampling_ratio/mean": 0.9996150135993958, "sampling/importance_sampling_ratio/min": 0.6855103969573975, "sampling/sampling_logp_difference/max": 0.42618465423583984, "sampling/sampling_logp_difference/mean": 0.019641850143671036, "step": 1355 }, { "clip_ratio/high_max": 0.007439553737640381, "clip_ratio/high_mean": 0.003474876983091235, "clip_ratio/low_mean": 0.003940482623875141, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.007415360305458307, "entropy": 0.9822201132774353, "epoch": 9.287671232876713, "grad_norm": 1.4309021408389826, "kl": 0.8288310766220093, "learning_rate": 2.679794520547945e-07, "loss": 0.0531, "step": 1356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2455.0, "completions/mean_length": 201.33929443359375, "completions/mean_terminated_length": 150.4545440673828, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.1284053325653076, "epoch": 9.294520547945206, "frac_reward_zero_std": 0.0, "grad_norm": 1.47930243795463, "kl": 1.1496626138687134, "learning_rate": 2.678082191780822e-07, "loss": 0.4619, "num_tokens": 14526221.0, "reward": 0.9486358165740967, "reward_std": 0.06756354123353958, "rewards/check_gptzero_func/mean": 0.9486357569694519, "rewards/check_gptzero_func/std": 0.16740909218788147, "sampling/importance_sampling_ratio/max": 1.2683945894241333, "sampling/importance_sampling_ratio/mean": 1.0000108480453491, "sampling/importance_sampling_ratio/min": 0.7839357256889343, "sampling/sampling_logp_difference/max": 0.24342823028564453, "sampling/sampling_logp_difference/mean": 0.018129384145140648, "step": 1357 }, { "clip_ratio/high_max": 0.0016181230312213302, "clip_ratio/high_mean": 0.00031996803591027856, "clip_ratio/low_mean": 0.0038875353056937456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004207503516227007, "entropy": 1.123429298400879, "epoch": 9.301369863013699, "grad_norm": 1.4038620981120584, "kl": 1.1322051286697388, "learning_rate": 2.6763698630136983e-07, "loss": 0.4549, "step": 1358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2043.0, "completions/max_terminated_length": 2043.0, "completions/mean_length": 275.3214416503906, "completions/mean_terminated_length": 275.3214416503906, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.8917425870895386, "epoch": 9.308219178082192, "frac_reward_zero_std": 0.0, "grad_norm": 1.4635746482792085, "kl": 1.2492421865463257, "learning_rate": 2.6746575342465753e-07, "loss": 0.0163, "num_tokens": 14546901.0, "reward": 0.8648799657821655, "reward_std": 0.056536488234996796, "rewards/check_gptzero_func/mean": 0.8648799657821655, "rewards/check_gptzero_func/std": 0.2625759541988373, "sampling/importance_sampling_ratio/max": 1.6757959127426147, "sampling/importance_sampling_ratio/mean": 0.9994097352027893, "sampling/importance_sampling_ratio/min": 0.7487884163856506, "sampling/sampling_logp_difference/max": 0.5162882804870605, "sampling/sampling_logp_difference/mean": 0.01781541109085083, "step": 1359 }, { "clip_ratio/high_max": 0.008057452738285065, "clip_ratio/high_mean": 0.0036224182695150375, "clip_ratio/low_mean": 0.0014823952224105597, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005104813724756241, "entropy": 0.8902081847190857, "epoch": 9.315068493150685, "grad_norm": 1.3629129021091404, "kl": 1.2169790267944336, "learning_rate": 2.6729452054794523e-07, "loss": 0.009, "step": 1360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714328289032, "completions/max_length": 3000.0, "completions/max_terminated_length": 1580.0, "completions/mean_length": 375.83929443359375, "completions/mean_terminated_length": 227.30189514160156, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 0.8829651474952698, "epoch": 9.321917808219178, "frac_reward_zero_std": 0.0, "grad_norm": 1.1250431359278368, "kl": 0.8212597966194153, "learning_rate": 2.671232876712329e-07, "loss": -0.0709, "num_tokens": 14572852.0, "reward": 0.919503927230835, "reward_std": 0.04632655531167984, "rewards/check_gptzero_func/mean": 0.9195038676261902, "rewards/check_gptzero_func/std": 0.1693253219127655, "sampling/importance_sampling_ratio/max": 1.3284192085266113, "sampling/importance_sampling_ratio/mean": 1.0002329349517822, "sampling/importance_sampling_ratio/min": 0.6255760788917542, "sampling/sampling_logp_difference/max": 0.4690823554992676, "sampling/sampling_logp_difference/mean": 0.015196479856967926, "step": 1361 }, { "clip_ratio/high_max": 0.007913279347121716, "clip_ratio/high_mean": 0.0027754215989261866, "clip_ratio/low_mean": 0.0010509921703487635, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038264135364443064, "entropy": 0.8832412362098694, "epoch": 9.32876712328767, "grad_norm": 1.0722948965758785, "kl": 0.8206496834754944, "learning_rate": 2.6695205479452054e-07, "loss": -0.0762, "step": 1362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1759.0, "completions/max_terminated_length": 1759.0, "completions/mean_length": 194.35714721679688, "completions/mean_terminated_length": 194.35714721679688, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.8751682043075562, "epoch": 9.335616438356164, "frac_reward_zero_std": 0.0, "grad_norm": 1.813630124223485, "kl": 1.4110195636749268, "learning_rate": 2.667808219178082e-07, "loss": 0.0389, "num_tokens": 14588314.0, "reward": 0.9289838075637817, "reward_std": 0.030775422230362892, "rewards/check_gptzero_func/mean": 0.928983747959137, "rewards/check_gptzero_func/std": 0.19046729803085327, "sampling/importance_sampling_ratio/max": 1.3751955032348633, "sampling/importance_sampling_ratio/mean": 0.9991757273674011, "sampling/importance_sampling_ratio/min": 0.7500039935112, "sampling/sampling_logp_difference/max": 0.31859588623046875, "sampling/sampling_logp_difference/mean": 0.01740904152393341, "step": 1363 }, { "clip_ratio/high_max": 0.008385743945837021, "clip_ratio/high_mean": 0.0033020146656781435, "clip_ratio/low_mean": 0.0015190922422334552, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004821106791496277, "entropy": 0.8796876072883606, "epoch": 9.342465753424657, "grad_norm": 1.608822095255186, "kl": 1.4014347791671753, "learning_rate": 2.666095890410959e-07, "loss": 0.0307, "step": 1364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1849.0, "completions/mean_length": 256.5714416503906, "completions/mean_terminated_length": 206.69090270996094, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.8104845285415649, "epoch": 9.349315068493151, "frac_reward_zero_std": 0.0714285746216774, "grad_norm": 1.6454661610391936, "kl": 1.5268968343734741, "learning_rate": 2.6643835616438354e-07, "loss": 0.145, "num_tokens": 14607580.0, "reward": 0.9139314889907837, "reward_std": 0.04810062795877457, "rewards/check_gptzero_func/mean": 0.9139314293861389, "rewards/check_gptzero_func/std": 0.17393755912780762, "sampling/importance_sampling_ratio/max": 1.4262126684188843, "sampling/importance_sampling_ratio/mean": 0.9999089241027832, "sampling/importance_sampling_ratio/min": 0.7052912712097168, "sampling/sampling_logp_difference/max": 0.3550224304199219, "sampling/sampling_logp_difference/mean": 0.017151448875665665, "step": 1365 }, { "clip_ratio/high_max": 0.00527803972363472, "clip_ratio/high_mean": 0.001618848298676312, "clip_ratio/low_mean": 0.0018615896115079522, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034804376773536205, "entropy": 0.8112525939941406, "epoch": 9.356164383561644, "grad_norm": 1.4196611154711483, "kl": 1.3508061170578003, "learning_rate": 2.662671232876712e-07, "loss": 0.1378, "step": 1366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1785.0, "completions/mean_length": 403.9821472167969, "completions/mean_terminated_length": 307.8333435058594, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.0043359994888306, "epoch": 9.363013698630137, "frac_reward_zero_std": 0.0, "grad_norm": 1.1811211396143673, "kl": 0.854326069355011, "learning_rate": 2.660958904109589e-07, "loss": -0.0055, "num_tokens": 14634805.0, "reward": 0.8043129444122314, "reward_std": 0.09207766503095627, "rewards/check_gptzero_func/mean": 0.8043128848075867, "rewards/check_gptzero_func/std": 0.3033338189125061, "sampling/importance_sampling_ratio/max": 1.5522594451904297, "sampling/importance_sampling_ratio/mean": 0.9998059868812561, "sampling/importance_sampling_ratio/min": 0.667952835559845, "sampling/sampling_logp_difference/max": 0.4397115707397461, "sampling/sampling_logp_difference/mean": 0.019935242831707, "step": 1367 }, { "clip_ratio/high_max": 0.007692307699471712, "clip_ratio/high_mean": 0.0027886219322681427, "clip_ratio/low_mean": 0.0015248057898133993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004313427489250898, "entropy": 1.0080045461654663, "epoch": 9.36986301369863, "grad_norm": 1.1326963405139954, "kl": 0.8551202416419983, "learning_rate": 2.659246575342466e-07, "loss": -0.0112, "step": 1368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714328289032, "completions/max_length": 3000.0, "completions/max_terminated_length": 2096.0, "completions/mean_length": 409.8214416503906, "completions/mean_terminated_length": 263.2075500488281, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.9092884063720703, "epoch": 9.376712328767123, "frac_reward_zero_std": 0.0, "grad_norm": 1.1588515522802283, "kl": 0.8780728578567505, "learning_rate": 2.6575342465753425e-07, "loss": 0.1005, "num_tokens": 14662485.0, "reward": 0.9208296537399292, "reward_std": 0.034535303711891174, "rewards/check_gptzero_func/mean": 0.9208296537399292, "rewards/check_gptzero_func/std": 0.16231469810009003, "sampling/importance_sampling_ratio/max": 1.6077884435653687, "sampling/importance_sampling_ratio/mean": 1.0002593994140625, "sampling/importance_sampling_ratio/min": 0.6334837079048157, "sampling/sampling_logp_difference/max": 0.4748595952987671, "sampling/sampling_logp_difference/mean": 0.02009156160056591, "step": 1369 }, { "clip_ratio/high_max": 0.006685768719762564, "clip_ratio/high_mean": 0.0028497709427028894, "clip_ratio/low_mean": 0.0024171422701328993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005266913212835789, "entropy": 0.9070634841918945, "epoch": 9.383561643835616, "grad_norm": 1.072499146279442, "kl": 0.8763235807418823, "learning_rate": 2.655821917808219e-07, "loss": 0.095, "step": 1370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 978.0, "completions/max_terminated_length": 978.0, "completions/mean_length": 100.76786041259766, "completions/mean_terminated_length": 100.76786041259766, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.8406945466995239, "epoch": 9.39041095890411, "frac_reward_zero_std": 0.0, "grad_norm": 1.801612628823567, "kl": 1.4690989255905151, "learning_rate": 2.654109589041096e-07, "loss": 0.0905, "num_tokens": 14672816.0, "reward": 0.9038491249084473, "reward_std": 0.03398123383522034, "rewards/check_gptzero_func/mean": 0.9038490653038025, "rewards/check_gptzero_func/std": 0.25251150131225586, "sampling/importance_sampling_ratio/max": 1.2680226564407349, "sampling/importance_sampling_ratio/mean": 0.9995384216308594, "sampling/importance_sampling_ratio/min": 0.7722291946411133, "sampling/sampling_logp_difference/max": 0.25847387313842773, "sampling/sampling_logp_difference/mean": 0.015668382868170738, "step": 1371 }, { "clip_ratio/high_max": 0.011686143465340137, "clip_ratio/high_mean": 0.003572136163711548, "clip_ratio/low_mean": 0.002651788294315338, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006223924458026886, "entropy": 0.8451170325279236, "epoch": 9.397260273972602, "grad_norm": 1.6729114714461313, "kl": 1.4576222896575928, "learning_rate": 2.6523972602739725e-07, "loss": 0.0825, "step": 1372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1441.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 204.73214721679688, "completions/mean_terminated_length": 204.73214721679688, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.937428891658783, "epoch": 9.404109589041095, "frac_reward_zero_std": 0.0, "grad_norm": 1.7463334522596208, "kl": 1.4767341613769531, "learning_rate": 2.650684931506849e-07, "loss": 0.1431, "num_tokens": 14689091.0, "reward": 0.9105851650238037, "reward_std": 0.02915533445775509, "rewards/check_gptzero_func/mean": 0.9105851054191589, "rewards/check_gptzero_func/std": 0.23303788900375366, "sampling/importance_sampling_ratio/max": 1.525022268295288, "sampling/importance_sampling_ratio/mean": 1.0000407695770264, "sampling/importance_sampling_ratio/min": 0.7027527093887329, "sampling/sampling_logp_difference/max": 0.4220089912414551, "sampling/sampling_logp_difference/mean": 0.018550528213381767, "step": 1373 }, { "clip_ratio/high_max": 0.0072421785444021225, "clip_ratio/high_mean": 0.0019446488004177809, "clip_ratio/low_mean": 0.0015806348528712988, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035252836532890797, "entropy": 0.939845085144043, "epoch": 9.41095890410959, "grad_norm": 1.6235269523495723, "kl": 1.3598377704620361, "learning_rate": 2.6489726027397255e-07, "loss": 0.1349, "step": 1374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 250.2678680419922, "completions/mean_terminated_length": 200.27272033691406, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1143755912780762, "epoch": 9.417808219178083, "frac_reward_zero_std": 0.0, "grad_norm": 1.440903615143169, "kl": 1.0509140491485596, "learning_rate": 2.6472602739726026e-07, "loss": 0.0871, "num_tokens": 14707744.0, "reward": 0.9065847396850586, "reward_std": 0.059161070734262466, "rewards/check_gptzero_func/mean": 0.9065846800804138, "rewards/check_gptzero_func/std": 0.24963879585266113, "sampling/importance_sampling_ratio/max": 1.6353034973144531, "sampling/importance_sampling_ratio/mean": 1.0001014471054077, "sampling/importance_sampling_ratio/min": 0.74715656042099, "sampling/sampling_logp_difference/max": 0.49182844161987305, "sampling/sampling_logp_difference/mean": 0.0188799686729908, "step": 1375 }, { "clip_ratio/high_max": 0.005745475646108389, "clip_ratio/high_mean": 0.0009908529464155436, "clip_ratio/low_mean": 0.0021822901908308268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031731431372463703, "entropy": 1.1163197755813599, "epoch": 9.424657534246576, "grad_norm": 1.3651536979502878, "kl": 1.0502574443817139, "learning_rate": 2.645547945205479e-07, "loss": 0.0807, "step": 1376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1912.0, "completions/max_terminated_length": 1912.0, "completions/mean_length": 274.1964416503906, "completions/mean_terminated_length": 274.1964416503906, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 1.153024435043335, "epoch": 9.431506849315069, "frac_reward_zero_std": 0.0, "grad_norm": 1.786206436514364, "kl": 1.3743699789047241, "learning_rate": 2.643835616438356e-07, "loss": 0.0601, "num_tokens": 14727569.0, "reward": 0.8947534561157227, "reward_std": 0.04246655851602554, "rewards/check_gptzero_func/mean": 0.8947533965110779, "rewards/check_gptzero_func/std": 0.2308875322341919, "sampling/importance_sampling_ratio/max": 1.3313496112823486, "sampling/importance_sampling_ratio/mean": 0.9991700053215027, "sampling/importance_sampling_ratio/min": 0.6968823671340942, "sampling/sampling_logp_difference/max": 0.36113858222961426, "sampling/sampling_logp_difference/mean": 0.021395282819867134, "step": 1377 }, { "clip_ratio/high_max": 0.007952285930514336, "clip_ratio/high_mean": 0.0028245989233255386, "clip_ratio/low_mean": 0.001672797603532672, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004497396759688854, "entropy": 1.1534652709960938, "epoch": 9.438356164383562, "grad_norm": 1.4693619524022477, "kl": 1.2799965143203735, "learning_rate": 2.642123287671233e-07, "loss": 0.0525, "step": 1378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 811.0, "completions/max_terminated_length": 811.0, "completions/mean_length": 123.67857360839844, "completions/mean_terminated_length": 123.67857360839844, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.9563754200935364, "epoch": 9.445205479452055, "frac_reward_zero_std": 0.0, "grad_norm": 1.8385835871767329, "kl": 1.6675233840942383, "learning_rate": 2.6404109589041096e-07, "loss": 0.0167, "num_tokens": 14739155.0, "reward": 0.9408008456230164, "reward_std": 0.04881976544857025, "rewards/check_gptzero_func/mean": 0.9408007860183716, "rewards/check_gptzero_func/std": 0.18910889327526093, "sampling/importance_sampling_ratio/max": 1.2892661094665527, "sampling/importance_sampling_ratio/mean": 0.9995827078819275, "sampling/importance_sampling_ratio/min": 0.6398302912712097, "sampling/sampling_logp_difference/max": 0.4465522766113281, "sampling/sampling_logp_difference/mean": 0.01751874014735222, "step": 1379 }, { "clip_ratio/high_max": 0.007905138656497002, "clip_ratio/high_mean": 0.001354277366772294, "clip_ratio/low_mean": 0.0018420673441141844, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031963444780558348, "entropy": 0.9600535035133362, "epoch": 9.452054794520548, "grad_norm": 1.7025473150994839, "kl": 1.6335973739624023, "learning_rate": 2.638698630136986e-07, "loss": 0.0081, "step": 1380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1949.0, "completions/max_terminated_length": 1949.0, "completions/mean_length": 374.3214416503906, "completions/mean_terminated_length": 374.3214416503906, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 1.107720971107483, "epoch": 9.45890410958904, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.3834352053989767, "kl": 0.9016522169113159, "learning_rate": 2.6369863013698626e-07, "loss": 0.0232, "num_tokens": 14765325.0, "reward": 0.8812568187713623, "reward_std": 0.02848362922668457, "rewards/check_gptzero_func/mean": 0.8812567591667175, "rewards/check_gptzero_func/std": 0.21901671588420868, "sampling/importance_sampling_ratio/max": 1.5725682973861694, "sampling/importance_sampling_ratio/mean": 1.0000351667404175, "sampling/importance_sampling_ratio/min": 0.3068602383136749, "sampling/sampling_logp_difference/max": 1.1813628673553467, "sampling/sampling_logp_difference/mean": 0.022834988310933113, "step": 1381 }, { "clip_ratio/high_max": 0.006984866224229336, "clip_ratio/high_mean": 0.0037417118437588215, "clip_ratio/low_mean": 0.0019656303338706493, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0057073417119681835, "entropy": 1.1063148975372314, "epoch": 9.465753424657533, "grad_norm": 1.3076001561887884, "kl": 0.9019905924797058, "learning_rate": 2.6352739726027397e-07, "loss": 0.0164, "step": 1382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2222.0, "completions/max_terminated_length": 2222.0, "completions/mean_length": 249.8035888671875, "completions/mean_terminated_length": 249.8035888671875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.9762646555900574, "epoch": 9.472602739726028, "frac_reward_zero_std": 0.0, "grad_norm": 1.6028824455086486, "kl": 1.1547328233718872, "learning_rate": 2.633561643835616e-07, "loss": -0.0086, "num_tokens": 14784078.0, "reward": 0.899509608745575, "reward_std": 0.04437137767672539, "rewards/check_gptzero_func/mean": 0.8995095491409302, "rewards/check_gptzero_func/std": 0.24168173968791962, "sampling/importance_sampling_ratio/max": 1.460378885269165, "sampling/importance_sampling_ratio/mean": 1.0002858638763428, "sampling/importance_sampling_ratio/min": 0.6163398027420044, "sampling/sampling_logp_difference/max": 0.48395681381225586, "sampling/sampling_logp_difference/mean": 0.019542226567864418, "step": 1383 }, { "clip_ratio/high_max": 0.008150646463036537, "clip_ratio/high_mean": 0.0024376765359193087, "clip_ratio/low_mean": 0.0018794296775013208, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004317105747759342, "entropy": 0.9786529541015625, "epoch": 9.479452054794521, "grad_norm": 1.5054657343168794, "kl": 1.1497528553009033, "learning_rate": 2.6318493150684927e-07, "loss": -0.0168, "step": 1384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2024.0, "completions/mean_length": 592.732177734375, "completions/mean_terminated_length": 548.963623046875, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 1.1641218662261963, "epoch": 9.486301369863014, "frac_reward_zero_std": 0.0, "grad_norm": 1.1038066240702997, "kl": 0.7213659882545471, "learning_rate": 2.6301369863013697e-07, "loss": 0.0693, "num_tokens": 14822187.0, "reward": 0.8009081482887268, "reward_std": 0.07362709194421768, "rewards/check_gptzero_func/mean": 0.800908088684082, "rewards/check_gptzero_func/std": 0.2763315439224243, "sampling/importance_sampling_ratio/max": 1.5732800960540771, "sampling/importance_sampling_ratio/mean": 0.9997298121452332, "sampling/importance_sampling_ratio/min": 0.620299756526947, "sampling/sampling_logp_difference/max": 0.4775524139404297, "sampling/sampling_logp_difference/mean": 0.023750508204102516, "step": 1385 }, { "clip_ratio/high_max": 0.006057053338736296, "clip_ratio/high_mean": 0.002819449407979846, "clip_ratio/low_mean": 0.0018004737794399261, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0046199229545891285, "entropy": 1.166725993156433, "epoch": 9.493150684931507, "grad_norm": 1.0536305663845222, "kl": 0.7210549712181091, "learning_rate": 2.628424657534247e-07, "loss": 0.0638, "step": 1386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2234.0, "completions/max_terminated_length": 2234.0, "completions/mean_length": 419.6250305175781, "completions/mean_terminated_length": 419.6250305175781, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 1.155409812927246, "epoch": 9.5, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.6748594904739011, "kl": 0.803712010383606, "learning_rate": 2.626712328767123e-07, "loss": 0.0028, "num_tokens": 14850338.0, "reward": 0.8482601642608643, "reward_std": 0.08024868369102478, "rewards/check_gptzero_func/mean": 0.8482600450515747, "rewards/check_gptzero_func/std": 0.2725161612033844, "sampling/importance_sampling_ratio/max": 1.6087019443511963, "sampling/importance_sampling_ratio/mean": 0.999488890171051, "sampling/importance_sampling_ratio/min": 0.5483723282814026, "sampling/sampling_logp_difference/max": 0.6008007526397705, "sampling/sampling_logp_difference/mean": 0.02254335954785347, "step": 1387 }, { "clip_ratio/high_max": 0.00498575484380126, "clip_ratio/high_mean": 0.002703402191400528, "clip_ratio/low_mean": 0.0012389988405629992, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003942400682717562, "entropy": 1.157683253288269, "epoch": 9.506849315068493, "grad_norm": 1.264057334065863, "kl": 0.8055601119995117, "learning_rate": 2.625e-07, "loss": -0.0031, "step": 1388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1619.0, "completions/max_terminated_length": 1619.0, "completions/mean_length": 170.83929443359375, "completions/mean_terminated_length": 170.83929443359375, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.0039926767349243, "epoch": 9.513698630136986, "frac_reward_zero_std": 0.0, "grad_norm": 1.8086745531466983, "kl": 1.5916731357574463, "learning_rate": 2.623287671232877e-07, "loss": 0.0049, "num_tokens": 14864659.0, "reward": 0.9105579853057861, "reward_std": 0.07605908066034317, "rewards/check_gptzero_func/mean": 0.9105579257011414, "rewards/check_gptzero_func/std": 0.209796741604805, "sampling/importance_sampling_ratio/max": 1.2789795398712158, "sampling/importance_sampling_ratio/mean": 1.0000044107437134, "sampling/importance_sampling_ratio/min": 0.5341581702232361, "sampling/sampling_logp_difference/max": 0.6270632743835449, "sampling/sampling_logp_difference/mean": 0.01920340768992901, "step": 1389 }, { "clip_ratio/high_max": 0.008014571852982044, "clip_ratio/high_mean": 0.002831363817676902, "clip_ratio/low_mean": 0.0023576191160827875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005188982933759689, "entropy": 1.0031598806381226, "epoch": 9.520547945205479, "grad_norm": 1.7009588317381643, "kl": 1.4957735538482666, "learning_rate": 2.6215753424657533e-07, "loss": -0.004, "step": 1390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2684.0, "completions/mean_length": 349.0000305175781, "completions/mean_terminated_length": 300.79998779296875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1443856954574585, "epoch": 9.527397260273972, "frac_reward_zero_std": 0.0, "grad_norm": 1.1392721303935849, "kl": 0.8584296107292175, "learning_rate": 2.61986301369863e-07, "loss": 0.2585, "num_tokens": 14888863.0, "reward": 0.9130054712295532, "reward_std": 0.05018706992268562, "rewards/check_gptzero_func/mean": 0.9130054116249084, "rewards/check_gptzero_func/std": 0.17685328423976898, "sampling/importance_sampling_ratio/max": 1.6018469333648682, "sampling/importance_sampling_ratio/mean": 1.0006603002548218, "sampling/importance_sampling_ratio/min": 0.6961684823036194, "sampling/sampling_logp_difference/max": 0.4711573123931885, "sampling/sampling_logp_difference/mean": 0.02071814052760601, "step": 1391 }, { "clip_ratio/high_max": 0.008413461968302727, "clip_ratio/high_mean": 0.00253034639172256, "clip_ratio/low_mean": 0.0014224947663024068, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0039528412744402885, "entropy": 1.148284912109375, "epoch": 9.534246575342467, "grad_norm": 1.0960151520562582, "kl": 0.8606072664260864, "learning_rate": 2.618150684931507e-07, "loss": 0.2527, "step": 1392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2379.0, "completions/max_terminated_length": 2379.0, "completions/mean_length": 235.5178680419922, "completions/mean_terminated_length": 235.5178680419922, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 0.9770388007164001, "epoch": 9.54109589041096, "frac_reward_zero_std": 0.0, "grad_norm": 1.5352155680873496, "kl": 1.2996752262115479, "learning_rate": 2.6164383561643833e-07, "loss": -0.0157, "num_tokens": 14906554.0, "reward": 0.9285733699798584, "reward_std": 0.020626267418265343, "rewards/check_gptzero_func/mean": 0.9285733103752136, "rewards/check_gptzero_func/std": 0.1568596363067627, "sampling/importance_sampling_ratio/max": 1.4318219423294067, "sampling/importance_sampling_ratio/mean": 1.0003602504730225, "sampling/importance_sampling_ratio/min": 0.6579803228378296, "sampling/sampling_logp_difference/max": 0.4185802936553955, "sampling/sampling_logp_difference/mean": 0.01784709095954895, "step": 1393 }, { "clip_ratio/high_max": 0.006414368283003569, "clip_ratio/high_mean": 0.0028400938026607037, "clip_ratio/low_mean": 0.0010843125637620687, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003924406599253416, "entropy": 0.9766953587532043, "epoch": 9.547945205479452, "grad_norm": 1.4754886302738586, "kl": 1.2995141744613647, "learning_rate": 2.61472602739726e-07, "loss": -0.0228, "step": 1394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2014.0, "completions/max_terminated_length": 2014.0, "completions/mean_length": 288.75, "completions/mean_terminated_length": 288.75, "completions/min_length": 9.0, "completions/min_terminated_length": 9.0, "entropy": 0.9568432569503784, "epoch": 9.554794520547945, "frac_reward_zero_std": 0.0, "grad_norm": 1.5104477253694526, "kl": 1.0946229696273804, "learning_rate": 2.613013698630137e-07, "loss": 0.0394, "num_tokens": 14927522.0, "reward": 0.9062114357948303, "reward_std": 0.05032261833548546, "rewards/check_gptzero_func/mean": 0.9062113761901855, "rewards/check_gptzero_func/std": 0.1820899248123169, "sampling/importance_sampling_ratio/max": 1.4177924394607544, "sampling/importance_sampling_ratio/mean": 1.0001733303070068, "sampling/importance_sampling_ratio/min": 0.7180240154266357, "sampling/sampling_logp_difference/max": 0.34910106658935547, "sampling/sampling_logp_difference/mean": 0.019135426729917526, "step": 1395 }, { "clip_ratio/high_max": 0.008749544620513916, "clip_ratio/high_mean": 0.0027410772163420916, "clip_ratio/low_mean": 0.0023800688795745373, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005121145863085985, "entropy": 0.9579997658729553, "epoch": 9.561643835616438, "grad_norm": 1.405690373132313, "kl": 1.0898123979568481, "learning_rate": 2.611301369863014e-07, "loss": 0.0319, "step": 1396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1759.0, "completions/max_terminated_length": 1759.0, "completions/mean_length": 244.33929443359375, "completions/mean_terminated_length": 244.33929443359375, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 0.7651108503341675, "epoch": 9.568493150684931, "frac_reward_zero_std": 0.0, "grad_norm": 1.6195587179829218, "kl": 1.1555947065353394, "learning_rate": 2.6095890410958904e-07, "loss": 0.0774, "num_tokens": 14946291.0, "reward": 0.9110999703407288, "reward_std": 0.022992314770817757, "rewards/check_gptzero_func/mean": 0.9110999703407288, "rewards/check_gptzero_func/std": 0.19776198267936707, "sampling/importance_sampling_ratio/max": 1.318159580230713, "sampling/importance_sampling_ratio/mean": 0.9996622800827026, "sampling/importance_sampling_ratio/min": 0.6482728123664856, "sampling/sampling_logp_difference/max": 0.43344366550445557, "sampling/sampling_logp_difference/mean": 0.01610010676085949, "step": 1397 }, { "clip_ratio/high_max": 0.007682458497583866, "clip_ratio/high_mean": 0.0019183779368177056, "clip_ratio/low_mean": 0.0017600922146812081, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036784701514989138, "entropy": 0.7652413249015808, "epoch": 9.575342465753424, "grad_norm": 2.0979678399795247, "kl": 1.1521638631820679, "learning_rate": 2.607876712328767e-07, "loss": 0.0696, "step": 1398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1506.0, "completions/max_terminated_length": 1506.0, "completions/mean_length": 214.42857360839844, "completions/mean_terminated_length": 214.42857360839844, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.069984793663025, "epoch": 9.582191780821917, "frac_reward_zero_std": 0.0, "grad_norm": 1.7716891594863886, "kl": 1.1857030391693115, "learning_rate": 2.606164383561644e-07, "loss": 0.0641, "num_tokens": 14963407.0, "reward": 0.8900333046913147, "reward_std": 0.06933371722698212, "rewards/check_gptzero_func/mean": 0.8900332450866699, "rewards/check_gptzero_func/std": 0.23667624592781067, "sampling/importance_sampling_ratio/max": 1.3606739044189453, "sampling/importance_sampling_ratio/mean": 0.9997760653495789, "sampling/importance_sampling_ratio/min": 0.6104094386100769, "sampling/sampling_logp_difference/max": 0.4936254024505615, "sampling/sampling_logp_difference/mean": 0.02081414684653282, "step": 1399 }, { "clip_ratio/high_max": 0.011111111380159855, "clip_ratio/high_mean": 0.002786386990919709, "clip_ratio/low_mean": 0.0021820582915097475, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004968445282429457, "entropy": 1.0710006952285767, "epoch": 9.58904109589041, "grad_norm": 1.5752346267728747, "kl": 1.177253007888794, "learning_rate": 2.6044520547945204e-07, "loss": 0.0563, "step": 1400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1837.0, "completions/max_terminated_length": 1837.0, "completions/mean_length": 292.58929443359375, "completions/mean_terminated_length": 292.58929443359375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1289958953857422, "epoch": 9.595890410958905, "frac_reward_zero_std": 0.0, "grad_norm": 1.4596756230678771, "kl": 0.9635935425758362, "learning_rate": 2.602739726027397e-07, "loss": -0.0074, "num_tokens": 14984600.0, "reward": 0.8630415797233582, "reward_std": 0.09916158020496368, "rewards/check_gptzero_func/mean": 0.8630414605140686, "rewards/check_gptzero_func/std": 0.25767794251441956, "sampling/importance_sampling_ratio/max": 1.3801078796386719, "sampling/importance_sampling_ratio/mean": 1.0002976655960083, "sampling/importance_sampling_ratio/min": 0.6953194737434387, "sampling/sampling_logp_difference/max": 0.36338376998901367, "sampling/sampling_logp_difference/mean": 0.022033395245671272, "step": 1401 }, { "clip_ratio/high_max": 0.007805182598531246, "clip_ratio/high_mean": 0.003132264595478773, "clip_ratio/low_mean": 0.001586475525982678, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004718740005046129, "entropy": 1.1303136348724365, "epoch": 9.602739726027398, "grad_norm": 1.3918104994660934, "kl": 0.9633308053016663, "learning_rate": 2.6010273972602735e-07, "loss": -0.0143, "step": 1402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 902.0, "completions/max_terminated_length": 902.0, "completions/mean_length": 124.64286041259766, "completions/mean_terminated_length": 124.64286041259766, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.9737656712532043, "epoch": 9.60958904109589, "frac_reward_zero_std": 0.0, "grad_norm": 1.6836515670984125, "kl": 1.397721529006958, "learning_rate": 2.5993150684931505e-07, "loss": 0.113, "num_tokens": 14996802.0, "reward": 0.9221227169036865, "reward_std": 0.08508063107728958, "rewards/check_gptzero_func/mean": 0.9221228361129761, "rewards/check_gptzero_func/std": 0.18547260761260986, "sampling/importance_sampling_ratio/max": 1.369289517402649, "sampling/importance_sampling_ratio/mean": 0.9994980692863464, "sampling/importance_sampling_ratio/min": 0.6622564196586609, "sampling/sampling_logp_difference/max": 0.41210246086120605, "sampling/sampling_logp_difference/mean": 0.01670142449438572, "step": 1403 }, { "clip_ratio/high_max": 0.004859086591750383, "clip_ratio/high_mean": 0.0012102697510272264, "clip_ratio/low_mean": 0.0018676629988476634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030779328662902117, "entropy": 0.9785604476928711, "epoch": 9.616438356164384, "grad_norm": 1.6056433336345388, "kl": 1.3789594173431396, "learning_rate": 2.5976027397260275e-07, "loss": 0.1056, "step": 1404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1917.0, "completions/max_terminated_length": 1917.0, "completions/mean_length": 242.87501525878906, "completions/mean_terminated_length": 242.87501525878906, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.9892719388008118, "epoch": 9.623287671232877, "frac_reward_zero_std": 0.0, "grad_norm": 1.7194394140899016, "kl": 1.170464277267456, "learning_rate": 2.595890410958904e-07, "loss": -0.0089, "num_tokens": 15015297.0, "reward": 0.907671332359314, "reward_std": 0.04609175771474838, "rewards/check_gptzero_func/mean": 0.907671332359314, "rewards/check_gptzero_func/std": 0.23140954971313477, "sampling/importance_sampling_ratio/max": 1.7206279039382935, "sampling/importance_sampling_ratio/mean": 1.0001839399337769, "sampling/importance_sampling_ratio/min": 0.6867730021476746, "sampling/sampling_logp_difference/max": 0.542689323425293, "sampling/sampling_logp_difference/mean": 0.020210882648825645, "step": 1405 }, { "clip_ratio/high_max": 0.008053318597376347, "clip_ratio/high_mean": 0.0031067978125065565, "clip_ratio/low_mean": 0.0021664374507963657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005273235496133566, "entropy": 0.9902086853981018, "epoch": 9.63013698630137, "grad_norm": 1.5652658236295016, "kl": 1.1550654172897339, "learning_rate": 2.594178082191781e-07, "loss": -0.0169, "step": 1406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 2755.0, "completions/mean_length": 476.58929443359375, "completions/mean_terminated_length": 383.129638671875, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 1.1565240621566772, "epoch": 9.636986301369863, "frac_reward_zero_std": 0.0, "grad_norm": 1.1321083015089437, "kl": 1.0261908769607544, "learning_rate": 2.5924657534246576e-07, "loss": 0.0972, "num_tokens": 15047288.0, "reward": 0.8937277793884277, "reward_std": 0.041335947811603546, "rewards/check_gptzero_func/mean": 0.893727719783783, "rewards/check_gptzero_func/std": 0.19947275519371033, "sampling/importance_sampling_ratio/max": 1.6106815338134766, "sampling/importance_sampling_ratio/mean": 1.000704288482666, "sampling/importance_sampling_ratio/min": 0.2262713611125946, "sampling/sampling_logp_difference/max": 1.4860203266143799, "sampling/sampling_logp_difference/mean": 0.020101062953472137, "step": 1407 }, { "clip_ratio/high_max": 0.0046712183393538, "clip_ratio/high_mean": 0.0018366804579272866, "clip_ratio/low_mean": 0.0009134770953096449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027501576114445925, "entropy": 1.1565240621566772, "epoch": 9.643835616438356, "grad_norm": 1.0877473495044452, "kl": 1.0146063566207886, "learning_rate": 2.590753424657534e-07, "loss": 0.0919, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1773.0, "completions/max_terminated_length": 1773.0, "completions/mean_length": 211.57144165039062, "completions/mean_terminated_length": 211.57144165039062, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "entropy": 1.1345653533935547, "epoch": 9.650684931506849, "frac_reward_zero_std": 0.0, "grad_norm": 1.6104853551112417, "kl": 1.194352149963379, "learning_rate": 2.5890410958904106e-07, "loss": 0.0836, "num_tokens": 15063880.0, "reward": 0.9073187112808228, "reward_std": 0.026706304401159286, "rewards/check_gptzero_func/mean": 0.9073187112808228, "rewards/check_gptzero_func/std": 0.22416795790195465, "sampling/importance_sampling_ratio/max": 1.602118968963623, "sampling/importance_sampling_ratio/mean": 1.0001075267791748, "sampling/importance_sampling_ratio/min": 0.49104610085487366, "sampling/sampling_logp_difference/max": 0.7112172842025757, "sampling/sampling_logp_difference/mean": 0.01872035674750805, "step": 1409 }, { "clip_ratio/high_max": 0.010888832621276379, "clip_ratio/high_mean": 0.0030627974774688482, "clip_ratio/low_mean": 0.0020623160526156425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005125113297253847, "entropy": 1.138878583908081, "epoch": 9.657534246575342, "grad_norm": 1.492576769734894, "kl": 1.1908527612686157, "learning_rate": 2.5873287671232876e-07, "loss": 0.0759, "step": 1410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2607.0, "completions/max_terminated_length": 2607.0, "completions/mean_length": 414.5000305175781, "completions/mean_terminated_length": 414.5000305175781, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "entropy": 1.3521229028701782, "epoch": 9.664383561643836, "frac_reward_zero_std": 0.0, "grad_norm": 1.3548192614758119, "kl": 1.0516834259033203, "learning_rate": 2.585616438356164e-07, "loss": -0.0022, "num_tokens": 15091880.0, "reward": 0.7861948609352112, "reward_std": 0.13555042445659637, "rewards/check_gptzero_func/mean": 0.7861947417259216, "rewards/check_gptzero_func/std": 0.31478214263916016, "sampling/importance_sampling_ratio/max": 1.609357237815857, "sampling/importance_sampling_ratio/mean": 0.999825656414032, "sampling/importance_sampling_ratio/min": 0.6512324213981628, "sampling/sampling_logp_difference/max": 0.47583484649658203, "sampling/sampling_logp_difference/mean": 0.025742115452885628, "step": 1411 }, { "clip_ratio/high_max": 0.005686433985829353, "clip_ratio/high_mean": 0.002888356801122427, "clip_ratio/low_mean": 0.0018108986550942063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004699255805462599, "entropy": 1.352936863899231, "epoch": 9.67123287671233, "grad_norm": 1.288999655340813, "kl": 1.0383093357086182, "learning_rate": 2.5839041095890406e-07, "loss": -0.0092, "step": 1412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2343.0, "completions/max_terminated_length": 2343.0, "completions/mean_length": 372.14288330078125, "completions/mean_terminated_length": 372.14288330078125, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.056127905845642, "epoch": 9.678082191780822, "frac_reward_zero_std": 0.0, "grad_norm": 1.3692621698472898, "kl": 1.2387373447418213, "learning_rate": 2.582191780821918e-07, "loss": -0.0007, "num_tokens": 15117446.0, "reward": 0.8733693957328796, "reward_std": 0.02047523856163025, "rewards/check_gptzero_func/mean": 0.8733693361282349, "rewards/check_gptzero_func/std": 0.26071783900260925, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9997625350952148, "sampling/importance_sampling_ratio/min": 0.6551734209060669, "sampling/sampling_logp_difference/max": 0.982750415802002, "sampling/sampling_logp_difference/mean": 0.020647641271352768, "step": 1413 }, { "clip_ratio/high_max": 0.006758512929081917, "clip_ratio/high_mean": 0.0029429644346237183, "clip_ratio/low_mean": 0.001730407471768558, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004673372022807598, "entropy": 1.0609776973724365, "epoch": 9.684931506849315, "grad_norm": 1.27517540725355, "kl": 1.2332675457000732, "learning_rate": 2.5804794520547947e-07, "loss": -0.0071, "step": 1414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1883.0, "completions/max_terminated_length": 1883.0, "completions/mean_length": 443.0535888671875, "completions/mean_terminated_length": 443.0535888671875, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 1.102583885192871, "epoch": 9.691780821917808, "frac_reward_zero_std": 0.0, "grad_norm": 1.2728939641794517, "kl": 0.8885291218757629, "learning_rate": 2.578767123287671e-07, "loss": -0.0014, "num_tokens": 15147599.0, "reward": 0.8315231204032898, "reward_std": 0.06880464404821396, "rewards/check_gptzero_func/mean": 0.831523060798645, "rewards/check_gptzero_func/std": 0.2549401819705963, "sampling/importance_sampling_ratio/max": 1.7520670890808105, "sampling/importance_sampling_ratio/mean": 0.9999171495437622, "sampling/importance_sampling_ratio/min": 0.6516913175582886, "sampling/sampling_logp_difference/max": 0.5607962608337402, "sampling/sampling_logp_difference/mean": 0.022862358018755913, "step": 1415 }, { "clip_ratio/high_max": 0.005133747588843107, "clip_ratio/high_mean": 0.0027783194091171026, "clip_ratio/low_mean": 0.001795475254766643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004573795013129711, "entropy": 1.1055086851119995, "epoch": 9.698630136986301, "grad_norm": 1.2172409289077157, "kl": 0.8849741816520691, "learning_rate": 2.5770547945205477e-07, "loss": -0.0076, "step": 1416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1551.0, "completions/max_terminated_length": 1551.0, "completions/mean_length": 152.1428680419922, "completions/mean_terminated_length": 152.1428680419922, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.9532855153083801, "epoch": 9.705479452054794, "frac_reward_zero_std": 0.0, "grad_norm": 1.9022728251101075, "kl": 1.3703583478927612, "learning_rate": 2.5753424657534247e-07, "loss": 0.1307, "num_tokens": 15160917.0, "reward": 0.9646518230438232, "reward_std": 0.030680373311042786, "rewards/check_gptzero_func/mean": 0.9646517634391785, "rewards/check_gptzero_func/std": 0.13689830899238586, "sampling/importance_sampling_ratio/max": 1.4345238208770752, "sampling/importance_sampling_ratio/mean": 0.9999305605888367, "sampling/importance_sampling_ratio/min": 0.733511745929718, "sampling/sampling_logp_difference/max": 0.36083292961120605, "sampling/sampling_logp_difference/mean": 0.018292058259248734, "step": 1417 }, { "clip_ratio/high_max": 0.004910456482321024, "clip_ratio/high_mean": 0.0012340288376435637, "clip_ratio/low_mean": 0.003884332487359643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005118361208587885, "entropy": 0.9580321907997131, "epoch": 9.712328767123287, "grad_norm": 1.768655838084395, "kl": 1.3562262058258057, "learning_rate": 2.573630136986301e-07, "loss": 0.1219, "step": 1418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2076.0, "completions/max_terminated_length": 2076.0, "completions/mean_length": 259.83929443359375, "completions/mean_terminated_length": 259.83929443359375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1067036390304565, "epoch": 9.719178082191782, "frac_reward_zero_std": 0.0, "grad_norm": 1.6333797294566086, "kl": 1.3199996948242188, "learning_rate": 2.5719178082191777e-07, "loss": -0.0555, "num_tokens": 15180280.0, "reward": 0.9384117722511292, "reward_std": 0.017181791365146637, "rewards/check_gptzero_func/mean": 0.9384117126464844, "rewards/check_gptzero_func/std": 0.16213905811309814, "sampling/importance_sampling_ratio/max": 1.5277003049850464, "sampling/importance_sampling_ratio/mean": 1.00002920627594, "sampling/importance_sampling_ratio/min": 0.6757906675338745, "sampling/sampling_logp_difference/max": 0.4237635135650635, "sampling/sampling_logp_difference/mean": 0.021681275218725204, "step": 1419 }, { "clip_ratio/high_max": 0.009106933139264584, "clip_ratio/high_mean": 0.003551818896085024, "clip_ratio/low_mean": 0.0009474316029809415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004499250091612339, "entropy": 1.108869194984436, "epoch": 9.726027397260275, "grad_norm": 1.5582039884997962, "kl": 1.3148715496063232, "learning_rate": 2.570205479452054e-07, "loss": -0.0634, "step": 1420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2413.0, "completions/max_terminated_length": 2413.0, "completions/mean_length": 276.0714416503906, "completions/mean_terminated_length": 276.0714416503906, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 1.1914969682693481, "epoch": 9.732876712328768, "frac_reward_zero_std": 0.0, "grad_norm": 2.2539524534409545, "kl": 1.0870676040649414, "learning_rate": 2.568493150684932e-07, "loss": 0.1075, "num_tokens": 15200806.0, "reward": 0.8978019952774048, "reward_std": 0.04795648157596588, "rewards/check_gptzero_func/mean": 0.89780193567276, "rewards/check_gptzero_func/std": 0.21145838499069214, "sampling/importance_sampling_ratio/max": 1.7329427003860474, "sampling/importance_sampling_ratio/mean": 1.0008000135421753, "sampling/importance_sampling_ratio/min": 0.002740012714639306, "sampling/sampling_logp_difference/max": 5.899792671203613, "sampling/sampling_logp_difference/mean": 0.022600580006837845, "step": 1421 }, { "clip_ratio/high_max": 0.0036307054106146097, "clip_ratio/high_mean": 0.0019906170200556517, "clip_ratio/low_mean": 0.0014286014484241605, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034192183520644903, "entropy": 1.1936309337615967, "epoch": 9.73972602739726, "grad_norm": 1.4589582261533154, "kl": 1.0833308696746826, "learning_rate": 2.5667808219178083e-07, "loss": 0.1009, "step": 1422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1532.0, "completions/max_terminated_length": 1532.0, "completions/mean_length": 255.62501525878906, "completions/mean_terminated_length": 255.62501525878906, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 1.1789178848266602, "epoch": 9.746575342465754, "frac_reward_zero_std": 0.0, "grad_norm": 1.5253656147336432, "kl": 1.0128861665725708, "learning_rate": 2.565068493150685e-07, "loss": 0.0723, "num_tokens": 15220239.0, "reward": 0.9092686772346497, "reward_std": 0.05828340724110603, "rewards/check_gptzero_func/mean": 0.9092686772346497, "rewards/check_gptzero_func/std": 0.21616345643997192, "sampling/importance_sampling_ratio/max": 1.379501223564148, "sampling/importance_sampling_ratio/mean": 1.0005478858947754, "sampling/importance_sampling_ratio/min": 0.7046300172805786, "sampling/sampling_logp_difference/max": 0.3500823974609375, "sampling/sampling_logp_difference/mean": 0.021976377815008163, "step": 1423 }, { "clip_ratio/high_max": 0.006397441029548645, "clip_ratio/high_mean": 0.0026107190642505884, "clip_ratio/low_mean": 0.0025078454054892063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005118564236909151, "entropy": 1.1836427450180054, "epoch": 9.753424657534246, "grad_norm": 1.4511636556108203, "kl": 1.0130116939544678, "learning_rate": 2.563356164383562e-07, "loss": 0.0648, "step": 1424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 212.0357208251953, "completions/mean_terminated_length": 212.0357208251953, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 0.997289776802063, "epoch": 9.76027397260274, "frac_reward_zero_std": 0.0, "grad_norm": 1.6881548036117247, "kl": 1.0606328248977661, "learning_rate": 2.5616438356164383e-07, "loss": 0.0134, "num_tokens": 15237457.0, "reward": 0.9172711372375488, "reward_std": 0.04318443685770035, "rewards/check_gptzero_func/mean": 0.917271077632904, "rewards/check_gptzero_func/std": 0.2125130146741867, "sampling/importance_sampling_ratio/max": 1.7126144170761108, "sampling/importance_sampling_ratio/mean": 0.9998388290405273, "sampling/importance_sampling_ratio/min": 0.6972272396087646, "sampling/sampling_logp_difference/max": 0.5380210876464844, "sampling/sampling_logp_difference/mean": 0.019027013331651688, "step": 1425 }, { "clip_ratio/high_max": 0.007453786674886942, "clip_ratio/high_mean": 0.002336713019758463, "clip_ratio/low_mean": 0.0017807212425395846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004117433913052082, "entropy": 0.9969610571861267, "epoch": 9.767123287671232, "grad_norm": 1.5532472013976273, "kl": 1.0526098012924194, "learning_rate": 2.559931506849315e-07, "loss": 0.0061, "step": 1426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 150.73214721679688, "completions/mean_terminated_length": 150.73214721679688, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.9885414838790894, "epoch": 9.773972602739725, "frac_reward_zero_std": 0.0, "grad_norm": 1.7818096852655991, "kl": 1.463369607925415, "learning_rate": 2.5582191780821913e-07, "loss": 0.0382, "num_tokens": 15251256.0, "reward": 0.9434444904327393, "reward_std": 0.03232480213046074, "rewards/check_gptzero_func/mean": 0.9434444308280945, "rewards/check_gptzero_func/std": 0.19306384027004242, "sampling/importance_sampling_ratio/max": 1.2881112098693848, "sampling/importance_sampling_ratio/mean": 1.0002018213272095, "sampling/importance_sampling_ratio/min": 0.679324209690094, "sampling/sampling_logp_difference/max": 0.3866567611694336, "sampling/sampling_logp_difference/mean": 0.01814597100019455, "step": 1427 }, { "clip_ratio/high_max": 0.01275781448930502, "clip_ratio/high_mean": 0.0023197263944894075, "clip_ratio/low_mean": 0.0016386708011850715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003958397079259157, "entropy": 0.9889878630638123, "epoch": 9.780821917808218, "grad_norm": 1.624497309176607, "kl": 1.4623976945877075, "learning_rate": 2.5565068493150684e-07, "loss": 0.0305, "step": 1428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1717.0, "completions/max_terminated_length": 1717.0, "completions/mean_length": 301.5535888671875, "completions/mean_terminated_length": 301.5535888671875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.3118315935134888, "epoch": 9.787671232876713, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.4383435716987645, "kl": 1.1060656309127808, "learning_rate": 2.554794520547945e-07, "loss": 0.0864, "num_tokens": 15272573.0, "reward": 0.923142671585083, "reward_std": 0.021400831639766693, "rewards/check_gptzero_func/mean": 0.9231426119804382, "rewards/check_gptzero_func/std": 0.1786538064479828, "sampling/importance_sampling_ratio/max": 1.373026967048645, "sampling/importance_sampling_ratio/mean": 1.000442385673523, "sampling/importance_sampling_ratio/min": 0.7016212344169617, "sampling/sampling_logp_difference/max": 0.35436153411865234, "sampling/sampling_logp_difference/mean": 0.022149469703435898, "step": 1429 }, { "clip_ratio/high_max": 0.005635390058159828, "clip_ratio/high_mean": 0.0016684596193954349, "clip_ratio/low_mean": 0.0018061947776004672, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003474654396995902, "entropy": 1.3127316236495972, "epoch": 9.794520547945206, "grad_norm": 1.3617105272427714, "kl": 1.1057935953140259, "learning_rate": 2.553082191780822e-07, "loss": 0.0792, "step": 1430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1765.0, "completions/max_terminated_length": 1765.0, "completions/mean_length": 265.4464416503906, "completions/mean_terminated_length": 265.4464416503906, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0812252759933472, "epoch": 9.801369863013699, "frac_reward_zero_std": 0.0, "grad_norm": 1.5529833386994614, "kl": 1.162770390510559, "learning_rate": 2.551369863013699e-07, "loss": 0.0859, "num_tokens": 15292400.0, "reward": 0.9183884263038635, "reward_std": 0.05761200189590454, "rewards/check_gptzero_func/mean": 0.9183883666992188, "rewards/check_gptzero_func/std": 0.15809200704097748, "sampling/importance_sampling_ratio/max": 1.5011377334594727, "sampling/importance_sampling_ratio/mean": 1.0001963376998901, "sampling/importance_sampling_ratio/min": 0.6269863843917847, "sampling/sampling_logp_difference/max": 0.4668304920196533, "sampling/sampling_logp_difference/mean": 0.020379528403282166, "step": 1431 }, { "clip_ratio/high_max": 0.005988024175167084, "clip_ratio/high_mean": 0.0021763197146356106, "clip_ratio/low_mean": 0.0012875247048214078, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00346384453587234, "entropy": 1.082451581954956, "epoch": 9.808219178082192, "grad_norm": 1.4674408453479009, "kl": 1.1657251119613647, "learning_rate": 2.5496575342465755e-07, "loss": 0.0791, "step": 1432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1813.0, "completions/mean_length": 401.64288330078125, "completions/mean_terminated_length": 354.3999938964844, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 1.0370060205459595, "epoch": 9.815068493150685, "frac_reward_zero_std": 0.0, "grad_norm": 1.2180419324973852, "kl": 1.1465564966201782, "learning_rate": 2.547945205479452e-07, "loss": 0.1355, "num_tokens": 15319476.0, "reward": 0.8939787149429321, "reward_std": 0.04487249627709389, "rewards/check_gptzero_func/mean": 0.8939787149429321, "rewards/check_gptzero_func/std": 0.18943597376346588, "sampling/importance_sampling_ratio/max": 1.5993629693984985, "sampling/importance_sampling_ratio/mean": 1.000601887702942, "sampling/importance_sampling_ratio/min": 0.6862673759460449, "sampling/sampling_logp_difference/max": 0.4696054458618164, "sampling/sampling_logp_difference/mean": 0.0198662132024765, "step": 1433 }, { "clip_ratio/high_max": 0.007666666526347399, "clip_ratio/high_mean": 0.0027639451436698437, "clip_ratio/low_mean": 0.0011148040648549795, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003878748742863536, "entropy": 1.0390366315841675, "epoch": 9.821917808219178, "grad_norm": 1.1547354657568314, "kl": 1.1438264846801758, "learning_rate": 2.5462328767123285e-07, "loss": 0.1295, "step": 1434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1723.0, "completions/max_terminated_length": 1723.0, "completions/mean_length": 284.7857360839844, "completions/mean_terminated_length": 284.7857360839844, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.9580947160720825, "epoch": 9.82876712328767, "frac_reward_zero_std": 0.0, "grad_norm": 1.4733705628785647, "kl": 1.2961294651031494, "learning_rate": 2.5445205479452055e-07, "loss": 0.0042, "num_tokens": 15340088.0, "reward": 0.9072210192680359, "reward_std": 0.04625297710299492, "rewards/check_gptzero_func/mean": 0.9072209596633911, "rewards/check_gptzero_func/std": 0.20441240072250366, "sampling/importance_sampling_ratio/max": 1.6981676816940308, "sampling/importance_sampling_ratio/mean": 0.9990749359130859, "sampling/importance_sampling_ratio/min": 0.6244409084320068, "sampling/sampling_logp_difference/max": 0.5295498371124268, "sampling/sampling_logp_difference/mean": 0.020888470113277435, "step": 1435 }, { "clip_ratio/high_max": 0.008029197342693806, "clip_ratio/high_mean": 0.0041755870915949345, "clip_ratio/low_mean": 0.0022127609699964523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.006388348061591387, "entropy": 0.9603762030601501, "epoch": 9.835616438356164, "grad_norm": 1.348942571829499, "kl": 1.2918779850006104, "learning_rate": 2.542808219178082e-07, "loss": -0.003, "step": 1436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2125.0, "completions/max_terminated_length": 2125.0, "completions/mean_length": 220.4285888671875, "completions/mean_terminated_length": 220.4285888671875, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.2822521924972534, "epoch": 9.842465753424658, "frac_reward_zero_std": 0.0, "grad_norm": 1.3409478203844767, "kl": 1.2594108581542969, "learning_rate": 2.5410958904109585e-07, "loss": 0.1339, "num_tokens": 15357622.0, "reward": 0.9684365391731262, "reward_std": 0.03333248943090439, "rewards/check_gptzero_func/mean": 0.9684365391731262, "rewards/check_gptzero_func/std": 0.0996471419930458, "sampling/importance_sampling_ratio/max": 1.4423853158950806, "sampling/importance_sampling_ratio/mean": 0.9999983906745911, "sampling/importance_sampling_ratio/min": 0.7599166035652161, "sampling/sampling_logp_difference/max": 0.36629819869995117, "sampling/sampling_logp_difference/mean": 0.019996026530861855, "step": 1437 }, { "clip_ratio/high_max": 0.006200909614562988, "clip_ratio/high_mean": 0.0008858442306518555, "clip_ratio/low_mean": 0.00046377856051549315, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013496227329596877, "entropy": 1.2843066453933716, "epoch": 9.849315068493151, "grad_norm": 1.2911198989113066, "kl": 1.2583686113357544, "learning_rate": 2.5393835616438355e-07, "loss": 0.1283, "step": 1438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2469.0, "completions/max_terminated_length": 2469.0, "completions/mean_length": 450.607177734375, "completions/mean_terminated_length": 450.607177734375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 1.3099414110183716, "epoch": 9.856164383561644, "frac_reward_zero_std": 0.0, "grad_norm": 1.2377446806776493, "kl": 1.3050967454910278, "learning_rate": 2.5376712328767126e-07, "loss": 0.1202, "num_tokens": 15388032.0, "reward": 0.8928235173225403, "reward_std": 0.058637890964746475, "rewards/check_gptzero_func/mean": 0.8928235173225403, "rewards/check_gptzero_func/std": 0.20635220408439636, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 1.0006285905838013, "sampling/importance_sampling_ratio/min": 0.5969772338867188, "sampling/sampling_logp_difference/max": 0.7179906368255615, "sampling/sampling_logp_difference/mean": 0.02195432409644127, "step": 1439 }, { "clip_ratio/high_max": 0.006301991641521454, "clip_ratio/high_mean": 0.0023896493948996067, "clip_ratio/low_mean": 0.002020488725975156, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004410137888044119, "entropy": 1.3103046417236328, "epoch": 9.863013698630137, "grad_norm": 1.2471191070755792, "kl": 1.1190407276153564, "learning_rate": 2.535958904109589e-07, "loss": 0.1145, "step": 1440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2170.0, "completions/mean_length": 321.8214416503906, "completions/mean_terminated_length": 273.12725830078125, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.490759015083313, "epoch": 9.86986301369863, "frac_reward_zero_std": 0.0, "grad_norm": 1.271413928316657, "kl": 1.0541504621505737, "learning_rate": 2.5342465753424656e-07, "loss": 0.038, "num_tokens": 15411470.0, "reward": 0.9064174890518188, "reward_std": 0.06500785052776337, "rewards/check_gptzero_func/mean": 0.9064174294471741, "rewards/check_gptzero_func/std": 0.19751575589179993, "sampling/importance_sampling_ratio/max": 1.427000880241394, "sampling/importance_sampling_ratio/mean": 1.0005131959915161, "sampling/importance_sampling_ratio/min": 0.700387179851532, "sampling/sampling_logp_difference/max": 0.3561220169067383, "sampling/sampling_logp_difference/mean": 0.02272987738251686, "step": 1441 }, { "clip_ratio/high_max": 0.004411935340613127, "clip_ratio/high_mean": 0.0006665530381724238, "clip_ratio/low_mean": 0.001045767217874527, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017123203724622726, "entropy": 1.492387056350708, "epoch": 9.876712328767123, "grad_norm": 1.2175618384953892, "kl": 1.0517804622650146, "learning_rate": 2.5325342465753426e-07, "loss": 0.0323, "step": 1442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 201.3928680419922, "completions/mean_terminated_length": 201.3928680419922, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.2251689434051514, "epoch": 9.883561643835616, "frac_reward_zero_std": 0.0, "grad_norm": 1.4750303094228574, "kl": 1.255724549293518, "learning_rate": 2.530821917808219e-07, "loss": 0.2324, "num_tokens": 15427942.0, "reward": 0.9697452783584595, "reward_std": 0.017120331525802612, "rewards/check_gptzero_func/mean": 0.9697452187538147, "rewards/check_gptzero_func/std": 0.11828415095806122, "sampling/importance_sampling_ratio/max": 1.377323031425476, "sampling/importance_sampling_ratio/mean": 1.0000501871109009, "sampling/importance_sampling_ratio/min": 0.7687540054321289, "sampling/sampling_logp_difference/max": 0.3201417922973633, "sampling/sampling_logp_difference/mean": 0.020079243928194046, "step": 1443 }, { "clip_ratio/high_max": 0.01173613965511322, "clip_ratio/high_mean": 0.0024586825165897608, "clip_ratio/low_mean": 0.0013829681556671858, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038416502065956593, "entropy": 1.2303087711334229, "epoch": 9.89041095890411, "grad_norm": 1.3858779533837593, "kl": 1.2365690469741821, "learning_rate": 2.5291095890410956e-07, "loss": 0.2263, "step": 1444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2184.0, "completions/mean_length": 306.2321472167969, "completions/mean_terminated_length": 257.2545471191406, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.1450512409210205, "epoch": 9.897260273972602, "frac_reward_zero_std": 0.0, "grad_norm": 1.4486531547924013, "kl": 1.446217656135559, "learning_rate": 2.527397260273972e-07, "loss": 0.1146, "num_tokens": 15449545.0, "reward": 0.9435932040214539, "reward_std": 0.015912430360913277, "rewards/check_gptzero_func/mean": 0.9435931444168091, "rewards/check_gptzero_func/std": 0.1400681585073471, "sampling/importance_sampling_ratio/max": 1.600963830947876, "sampling/importance_sampling_ratio/mean": 1.000573754310608, "sampling/importance_sampling_ratio/min": 0.3935392498970032, "sampling/sampling_logp_difference/max": 0.9325745105743408, "sampling/sampling_logp_difference/mean": 0.02135538123548031, "step": 1445 }, { "clip_ratio/high_max": 0.004519978072494268, "clip_ratio/high_mean": 0.0023444348480552435, "clip_ratio/low_mean": 0.0019198006484657526, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004264235030859709, "entropy": 1.1458778381347656, "epoch": 9.904109589041095, "grad_norm": 1.362641679785204, "kl": 1.4436146020889282, "learning_rate": 2.525684931506849e-07, "loss": 0.1075, "step": 1446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1670.0, "completions/mean_length": 249.7678680419922, "completions/mean_terminated_length": 199.7636260986328, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 1.2522860765457153, "epoch": 9.91095890410959, "frac_reward_zero_std": 0.0, "grad_norm": 1.4008853969760775, "kl": 1.067325472831726, "learning_rate": 2.5239726027397257e-07, "loss": 0.2649, "num_tokens": 15468808.0, "reward": 0.9062883257865906, "reward_std": 0.08452210575342178, "rewards/check_gptzero_func/mean": 0.9062882661819458, "rewards/check_gptzero_func/std": 0.2327657788991928, "sampling/importance_sampling_ratio/max": 1.276204228401184, "sampling/importance_sampling_ratio/mean": 0.9997257590293884, "sampling/importance_sampling_ratio/min": 0.6411099433898926, "sampling/sampling_logp_difference/max": 0.44455432891845703, "sampling/sampling_logp_difference/mean": 0.020197784528136253, "step": 1447 }, { "clip_ratio/high_max": 0.009986130520701408, "clip_ratio/high_mean": 0.0029276444111019373, "clip_ratio/low_mean": 0.0016425523208454251, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004570196382701397, "entropy": 1.25679612159729, "epoch": 9.917808219178083, "grad_norm": 1.2958801380424043, "kl": 1.0663729906082153, "learning_rate": 2.5222602739726027e-07, "loss": 0.259, "step": 1448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1519.0, "completions/max_terminated_length": 1519.0, "completions/mean_length": 224.50001525878906, "completions/mean_terminated_length": 224.50001525878906, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.2706252336502075, "epoch": 9.924657534246576, "frac_reward_zero_std": 0.0, "grad_norm": 1.617276612859236, "kl": 1.1282575130462646, "learning_rate": 2.5205479452054797e-07, "loss": 0.0033, "num_tokens": 15486252.0, "reward": 0.9259912371635437, "reward_std": 0.04244620352983475, "rewards/check_gptzero_func/mean": 0.9259911775588989, "rewards/check_gptzero_func/std": 0.16225509345531464, "sampling/importance_sampling_ratio/max": 1.6136187314987183, "sampling/importance_sampling_ratio/mean": 0.9991370439529419, "sampling/importance_sampling_ratio/min": 0.42346930503845215, "sampling/sampling_logp_difference/max": 0.8592742681503296, "sampling/sampling_logp_difference/mean": 0.02199472300708294, "step": 1449 }, { "clip_ratio/high_max": 0.006975932978093624, "clip_ratio/high_mean": 0.0024929754436016083, "clip_ratio/low_mean": 0.0018126250943168998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00430560065433383, "entropy": 1.2722625732421875, "epoch": 9.931506849315069, "grad_norm": 1.5062296871707168, "kl": 1.1128932237625122, "learning_rate": 2.518835616438356e-07, "loss": -0.0035, "step": 1450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2487.0, "completions/mean_length": 408.8571472167969, "completions/mean_terminated_length": 361.7454528808594, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.4209321737289429, "epoch": 9.938356164383562, "frac_reward_zero_std": 0.0, "grad_norm": 1.2286698646102188, "kl": 0.9925192594528198, "learning_rate": 2.5171232876712327e-07, "loss": 0.0844, "num_tokens": 15514134.0, "reward": 0.910292387008667, "reward_std": 0.03567984700202942, "rewards/check_gptzero_func/mean": 0.9102923274040222, "rewards/check_gptzero_func/std": 0.18021593987941742, "sampling/importance_sampling_ratio/max": 1.3280954360961914, "sampling/importance_sampling_ratio/mean": 1.0003074407577515, "sampling/importance_sampling_ratio/min": 0.6439591646194458, "sampling/sampling_logp_difference/max": 0.44011998176574707, "sampling/sampling_logp_difference/mean": 0.023345811292529106, "step": 1451 }, { "clip_ratio/high_max": 0.005062126088887453, "clip_ratio/high_mean": 0.0016149774892255664, "clip_ratio/low_mean": 0.0011762710055336356, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027912482619285583, "entropy": 1.4190104007720947, "epoch": 9.945205479452055, "grad_norm": 1.184521436646489, "kl": 0.9932288527488708, "learning_rate": 2.515410958904109e-07, "loss": 0.0783, "step": 1452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1686.0, "completions/max_terminated_length": 1686.0, "completions/mean_length": 235.6785888671875, "completions/mean_terminated_length": 235.6785888671875, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 1.2292872667312622, "epoch": 9.952054794520548, "frac_reward_zero_std": 0.0, "grad_norm": 1.5531577295132102, "kl": 1.2380049228668213, "learning_rate": 2.5136986301369863e-07, "loss": 0.0439, "num_tokens": 15532142.0, "reward": 0.9032156467437744, "reward_std": 0.0683407112956047, "rewards/check_gptzero_func/mean": 0.9032155275344849, "rewards/check_gptzero_func/std": 0.23232004046440125, "sampling/importance_sampling_ratio/max": 1.388127088546753, "sampling/importance_sampling_ratio/mean": 0.9997185468673706, "sampling/importance_sampling_ratio/min": 0.7277857661247253, "sampling/sampling_logp_difference/max": 0.3279554843902588, "sampling/sampling_logp_difference/mean": 0.02201240137219429, "step": 1453 }, { "clip_ratio/high_max": 0.007855459116399288, "clip_ratio/high_mean": 0.0028680837713181973, "clip_ratio/low_mean": 0.0016226153820753098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004490699153393507, "entropy": 1.2309743165969849, "epoch": 9.95890410958904, "grad_norm": 1.4612575845142475, "kl": 1.2394782304763794, "learning_rate": 2.511986301369863e-07, "loss": 0.0366, "step": 1454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1719.0, "completions/max_terminated_length": 1719.0, "completions/mean_length": 309.5, "completions/mean_terminated_length": 309.5, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "entropy": 1.2678884267807007, "epoch": 9.965753424657533, "frac_reward_zero_std": 0.0, "grad_norm": 1.4614187541521955, "kl": 0.9550915360450745, "learning_rate": 2.5102739726027393e-07, "loss": -0.0251, "num_tokens": 15554098.0, "reward": 0.8969007134437561, "reward_std": 0.04594361037015915, "rewards/check_gptzero_func/mean": 0.8969006538391113, "rewards/check_gptzero_func/std": 0.21292585134506226, "sampling/importance_sampling_ratio/max": 1.6039454936981201, "sampling/importance_sampling_ratio/mean": 0.9999504089355469, "sampling/importance_sampling_ratio/min": 0.6167558431625366, "sampling/sampling_logp_difference/max": 0.48328208923339844, "sampling/sampling_logp_difference/mean": 0.02408992312848568, "step": 1455 }, { "clip_ratio/high_max": 0.008890606462955475, "clip_ratio/high_mean": 0.003970824647694826, "clip_ratio/low_mean": 0.0018850441556423903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00585586903616786, "entropy": 1.2670049667358398, "epoch": 9.972602739726028, "grad_norm": 1.384770045202042, "kl": 0.952642023563385, "learning_rate": 2.5085616438356163e-07, "loss": -0.0322, "step": 1456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2961.0, "completions/mean_length": 353.7321472167969, "completions/mean_terminated_length": 305.6181640625, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.3351001739501953, "epoch": 9.979452054794521, "frac_reward_zero_std": 0.0, "grad_norm": 1.2270325684306467, "kl": 1.1592473983764648, "learning_rate": 2.5068493150684933e-07, "loss": -0.0087, "num_tokens": 15579569.0, "reward": 0.9157103300094604, "reward_std": 0.06061366945505142, "rewards/check_gptzero_func/mean": 0.9157103896141052, "rewards/check_gptzero_func/std": 0.19152306020259857, "sampling/importance_sampling_ratio/max": 1.3684076070785522, "sampling/importance_sampling_ratio/mean": 1.0003942251205444, "sampling/importance_sampling_ratio/min": 0.7082242369651794, "sampling/sampling_logp_difference/max": 0.34499454498291016, "sampling/sampling_logp_difference/mean": 0.021402915939688683, "step": 1457 }, { "clip_ratio/high_max": 0.0032786885276436806, "clip_ratio/high_mean": 0.0013424912467598915, "clip_ratio/low_mean": 0.0008078543469309807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002150345593690872, "entropy": 1.3314560651779175, "epoch": 9.986301369863014, "grad_norm": 1.1880373273337679, "kl": 1.1597105264663696, "learning_rate": 2.50513698630137e-07, "loss": -0.0142, "step": 1458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2019.0, "completions/max_terminated_length": 2019.0, "completions/mean_length": 296.7321472167969, "completions/mean_terminated_length": 296.7321472167969, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 1.3927936553955078, "epoch": 9.993150684931507, "frac_reward_zero_std": 0.0, "grad_norm": 2.58683115947069, "kl": 1.1007639169692993, "learning_rate": 2.5034246575342463e-07, "loss": -0.0958, "num_tokens": 15601370.0, "reward": 0.9006994962692261, "reward_std": 0.054121263325214386, "rewards/check_gptzero_func/mean": 0.9006994962692261, "rewards/check_gptzero_func/std": 0.22301049530506134, "sampling/importance_sampling_ratio/max": 1.2674070596694946, "sampling/importance_sampling_ratio/mean": 0.9999262690544128, "sampling/importance_sampling_ratio/min": 0.7321202754974365, "sampling/sampling_logp_difference/max": 0.3118104934692383, "sampling/sampling_logp_difference/mean": 0.023006876930594444, "step": 1459 }, { "clip_ratio/high_max": 0.0035536603536456823, "clip_ratio/high_mean": 0.0015895651886239648, "clip_ratio/low_mean": 0.000697009323630482, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022865745704621077, "entropy": 1.3948320150375366, "epoch": 10.0, "grad_norm": 1.325950318750705, "kl": 1.0811846256256104, "learning_rate": 2.5017123287671234e-07, "loss": -0.101, "step": 1460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1563.0, "completions/mean_length": 357.4107360839844, "completions/mean_terminated_length": 259.53704833984375, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 1.496145486831665, "epoch": 10.006849315068493, "frac_reward_zero_std": 0.0, "grad_norm": 1.233702393356037, "kl": 0.9464890360832214, "learning_rate": 2.5e-07, "loss": -0.1854, "num_tokens": 15626805.0, "reward": 0.8783512711524963, "reward_std": 0.0682065337896347, "rewards/check_gptzero_func/mean": 0.8783512115478516, "rewards/check_gptzero_func/std": 0.24946768581867218, "sampling/importance_sampling_ratio/max": 1.370897889137268, "sampling/importance_sampling_ratio/mean": 0.9998508095741272, "sampling/importance_sampling_ratio/min": 0.6888173818588257, "sampling/sampling_logp_difference/max": 0.37277913093566895, "sampling/sampling_logp_difference/mean": 0.02120564691722393, "step": 1461 }, { "clip_ratio/high_max": 0.005093919113278389, "clip_ratio/high_mean": 0.001304483856074512, "clip_ratio/low_mean": 0.0009653771994635463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022698608227074146, "entropy": 1.4956262111663818, "epoch": 10.013698630136986, "grad_norm": 1.189261579232988, "kl": 0.9437350034713745, "learning_rate": 2.4982876712328764e-07, "loss": -0.191, "step": 1462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1902.0, "completions/mean_length": 331.96429443359375, "completions/mean_terminated_length": 283.45452880859375, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 1.3571561574935913, "epoch": 10.020547945205479, "frac_reward_zero_std": 0.0, "grad_norm": 2.825528432995311, "kl": 1.1960216760635376, "learning_rate": 2.4965753424657534e-07, "loss": 0.0786, "num_tokens": 15650035.0, "reward": 0.9027348160743713, "reward_std": 0.07587388902902603, "rewards/check_gptzero_func/mean": 0.9027347564697266, "rewards/check_gptzero_func/std": 0.24152393639087677, "sampling/importance_sampling_ratio/max": 1.4100407361984253, "sampling/importance_sampling_ratio/mean": 0.9998384118080139, "sampling/importance_sampling_ratio/min": 0.49492332339286804, "sampling/sampling_logp_difference/max": 0.7033524513244629, "sampling/sampling_logp_difference/mean": 0.021210815757513046, "step": 1463 }, { "clip_ratio/high_max": 0.0033957716077566147, "clip_ratio/high_mean": 0.0008631391683593392, "clip_ratio/low_mean": 0.001054821303114295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019179603550583124, "entropy": 1.3580573797225952, "epoch": 10.027397260273972, "grad_norm": 1.3713965012591234, "kl": 1.181427001953125, "learning_rate": 2.49486301369863e-07, "loss": 0.0733, "step": 1464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2601.0, "completions/max_terminated_length": 2601.0, "completions/mean_length": 370.6250305175781, "completions/mean_terminated_length": 370.6250305175781, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 1.4494389295578003, "epoch": 10.034246575342467, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.326263086652298, "kl": 1.110225796699524, "learning_rate": 2.493150684931507e-07, "loss": 0.0888, "num_tokens": 15675888.0, "reward": 0.9292023181915283, "reward_std": 0.03567935898900032, "rewards/check_gptzero_func/mean": 0.9292022585868835, "rewards/check_gptzero_func/std": 0.1510133296251297, "sampling/importance_sampling_ratio/max": 1.455322504043579, "sampling/importance_sampling_ratio/mean": 0.9996463656425476, "sampling/importance_sampling_ratio/min": 0.7036523222923279, "sampling/sampling_logp_difference/max": 0.3752274513244629, "sampling/sampling_logp_difference/mean": 0.023847755044698715, "step": 1465 }, { "clip_ratio/high_max": 0.004324933979660273, "clip_ratio/high_mean": 0.001456396421417594, "clip_ratio/low_mean": 0.0013085345271974802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027649307157844305, "entropy": 1.4473024606704712, "epoch": 10.04109589041096, "grad_norm": 1.2561939617758848, "kl": 1.1123660802841187, "learning_rate": 2.4914383561643835e-07, "loss": 0.0824, "step": 1466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1728.0, "completions/mean_length": 362.1071472167969, "completions/mean_terminated_length": 314.14544677734375, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 1.3296184539794922, "epoch": 10.047945205479452, "frac_reward_zero_std": 0.0, "grad_norm": 1.2873161768787442, "kl": 1.1433889865875244, "learning_rate": 2.48972602739726e-07, "loss": 0.0882, "num_tokens": 15700786.0, "reward": 0.90301114320755, "reward_std": 0.049263548105955124, "rewards/check_gptzero_func/mean": 0.90301114320755, "rewards/check_gptzero_func/std": 0.2059447169303894, "sampling/importance_sampling_ratio/max": 1.3052083253860474, "sampling/importance_sampling_ratio/mean": 1.0000251531600952, "sampling/importance_sampling_ratio/min": 0.14608235657215118, "sampling/sampling_logp_difference/max": 1.9235846996307373, "sampling/sampling_logp_difference/mean": 0.023769626393914223, "step": 1467 }, { "clip_ratio/high_max": 0.004950494971126318, "clip_ratio/high_mean": 0.0022311212960630655, "clip_ratio/low_mean": 0.0011778745101764798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003408996155485511, "entropy": 1.3283988237380981, "epoch": 10.054794520547945, "grad_norm": 1.2409087719160716, "kl": 1.1412209272384644, "learning_rate": 2.488013698630137e-07, "loss": 0.0821, "step": 1468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1996.0, "completions/max_terminated_length": 1996.0, "completions/mean_length": 197.21429443359375, "completions/mean_terminated_length": 197.21429443359375, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 1.2444419860839844, "epoch": 10.061643835616438, "frac_reward_zero_std": 0.0, "grad_norm": 1.5481303323397269, "kl": 1.1994901895523071, "learning_rate": 2.4863013698630135e-07, "loss": 0.0496, "num_tokens": 15717022.0, "reward": 0.9507613778114319, "reward_std": 0.025785572826862335, "rewards/check_gptzero_func/mean": 0.9507613182067871, "rewards/check_gptzero_func/std": 0.16259263455867767, "sampling/importance_sampling_ratio/max": 1.2848259210586548, "sampling/importance_sampling_ratio/mean": 1.0005439519882202, "sampling/importance_sampling_ratio/min": 0.7038751244544983, "sampling/sampling_logp_difference/max": 0.3511543273925781, "sampling/sampling_logp_difference/mean": 0.02091163583099842, "step": 1469 }, { "clip_ratio/high_max": 0.006432459224015474, "clip_ratio/high_mean": 0.0016955120954662561, "clip_ratio/low_mean": 0.0013658867683261633, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030613988637924194, "entropy": 1.243361473083496, "epoch": 10.068493150684931, "grad_norm": 1.4664556933061368, "kl": 1.2004059553146362, "learning_rate": 2.4845890410958905e-07, "loss": 0.0428, "step": 1470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2178.0, "completions/mean_length": 222.3928680419922, "completions/mean_terminated_length": 171.89089965820312, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 1.3860769271850586, "epoch": 10.075342465753424, "frac_reward_zero_std": 0.0, "grad_norm": 1.6284879362776732, "kl": 1.3156005144119263, "learning_rate": 2.482876712328767e-07, "loss": 0.0311, "num_tokens": 15734568.0, "reward": 0.9261552095413208, "reward_std": 0.058070339262485504, "rewards/check_gptzero_func/mean": 0.926155149936676, "rewards/check_gptzero_func/std": 0.18315596878528595, "sampling/importance_sampling_ratio/max": 1.5283920764923096, "sampling/importance_sampling_ratio/mean": 0.9998934864997864, "sampling/importance_sampling_ratio/min": 0.5377895832061768, "sampling/sampling_logp_difference/max": 0.6202878952026367, "sampling/sampling_logp_difference/mean": 0.02173972874879837, "step": 1471 }, { "clip_ratio/high_max": 0.005567928776144981, "clip_ratio/high_mean": 0.0017751246923580766, "clip_ratio/low_mean": 0.001755927805788815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003531052265316248, "entropy": 1.3897502422332764, "epoch": 10.082191780821917, "grad_norm": 1.5440311501669903, "kl": 1.3086313009262085, "learning_rate": 2.4811643835616435e-07, "loss": 0.0234, "step": 1472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1689.0, "completions/max_terminated_length": 1689.0, "completions/mean_length": 202.7678680419922, "completions/mean_terminated_length": 202.7678680419922, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.3479591608047485, "epoch": 10.08904109589041, "frac_reward_zero_std": 0.0, "grad_norm": 1.553380725153179, "kl": 1.1797494888305664, "learning_rate": 2.4794520547945206e-07, "loss": 0.1673, "num_tokens": 15750635.0, "reward": 0.9286389350891113, "reward_std": 0.055341292172670364, "rewards/check_gptzero_func/mean": 0.9286388754844666, "rewards/check_gptzero_func/std": 0.18676871061325073, "sampling/importance_sampling_ratio/max": 1.258453607559204, "sampling/importance_sampling_ratio/mean": 1.0000972747802734, "sampling/importance_sampling_ratio/min": 0.6070002317428589, "sampling/sampling_logp_difference/max": 0.49922609329223633, "sampling/sampling_logp_difference/mean": 0.022330762818455696, "step": 1473 }, { "clip_ratio/high_max": 0.008509540930390358, "clip_ratio/high_mean": 0.0016699974657967687, "clip_ratio/low_mean": 0.0016794848488643765, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003349482547491789, "entropy": 1.3464993238449097, "epoch": 10.095890410958905, "grad_norm": 1.459710134640709, "kl": 1.1809282302856445, "learning_rate": 2.477739726027397e-07, "loss": 0.1605, "step": 1474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1843.0, "completions/max_terminated_length": 1843.0, "completions/mean_length": 247.1607208251953, "completions/mean_terminated_length": 247.1607208251953, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.3594039678573608, "epoch": 10.102739726027398, "frac_reward_zero_std": 0.0, "grad_norm": 1.5152922020614559, "kl": 1.2016347646713257, "learning_rate": 2.4760273972602736e-07, "loss": 0.0066, "num_tokens": 15769738.0, "reward": 0.9503734111785889, "reward_std": 0.02446402795612812, "rewards/check_gptzero_func/mean": 0.9503733515739441, "rewards/check_gptzero_func/std": 0.11556139588356018, "sampling/importance_sampling_ratio/max": 1.641624927520752, "sampling/importance_sampling_ratio/mean": 1.000548243522644, "sampling/importance_sampling_ratio/min": 0.7830573916435242, "sampling/sampling_logp_difference/max": 0.49568653106689453, "sampling/sampling_logp_difference/mean": 0.02171093039214611, "step": 1475 }, { "clip_ratio/high_max": 0.0052083334885537624, "clip_ratio/high_mean": 0.001732919947244227, "clip_ratio/low_mean": 0.0008798757917247713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026127956807613373, "entropy": 1.3608695268630981, "epoch": 10.10958904109589, "grad_norm": 1.4475672005173108, "kl": 1.2008116245269775, "learning_rate": 2.4743150684931506e-07, "loss": -0.0007, "step": 1476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1722.0, "completions/max_terminated_length": 1722.0, "completions/mean_length": 260.1071472167969, "completions/mean_terminated_length": 260.1071472167969, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 1.4647653102874756, "epoch": 10.116438356164384, "frac_reward_zero_std": 0.0, "grad_norm": 1.434265263997982, "kl": 1.0179113149642944, "learning_rate": 2.4726027397260277e-07, "loss": 0.0647, "num_tokens": 15789672.0, "reward": 0.9321775436401367, "reward_std": 0.05673481523990631, "rewards/check_gptzero_func/mean": 0.9321774840354919, "rewards/check_gptzero_func/std": 0.17944353818893433, "sampling/importance_sampling_ratio/max": 1.3398257493972778, "sampling/importance_sampling_ratio/mean": 1.000091791152954, "sampling/importance_sampling_ratio/min": 0.6940885186195374, "sampling/sampling_logp_difference/max": 0.3651556968688965, "sampling/sampling_logp_difference/mean": 0.022855741903185844, "step": 1477 }, { "clip_ratio/high_max": 0.004612769931554794, "clip_ratio/high_mean": 0.001797812758013606, "clip_ratio/low_mean": 0.0008740765042603016, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026718894951045513, "entropy": 1.46299147605896, "epoch": 10.123287671232877, "grad_norm": 1.386407399905354, "kl": 1.016724944114685, "learning_rate": 2.470890410958904e-07, "loss": 0.0585, "step": 1478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2116.0, "completions/mean_length": 373.1250305175781, "completions/mean_terminated_length": 325.3636169433594, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "entropy": 1.329323649406433, "epoch": 10.13013698630137, "frac_reward_zero_std": 0.0, "grad_norm": 1.271552449004844, "kl": 0.999671459197998, "learning_rate": 2.4691780821917807e-07, "loss": 0.1564, "num_tokens": 15814739.0, "reward": 0.9056710004806519, "reward_std": 0.02270667813718319, "rewards/check_gptzero_func/mean": 0.9056710004806519, "rewards/check_gptzero_func/std": 0.19862601161003113, "sampling/importance_sampling_ratio/max": 1.7300574779510498, "sampling/importance_sampling_ratio/mean": 1.000154733657837, "sampling/importance_sampling_ratio/min": 0.0669625923037529, "sampling/sampling_logp_difference/max": 2.7036211490631104, "sampling/sampling_logp_difference/mean": 0.023252364248037338, "step": 1479 }, { "clip_ratio/high_max": 0.004601025488227606, "clip_ratio/high_mean": 0.0018926113843917847, "clip_ratio/low_mean": 0.0015159405302256346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034085519146174192, "entropy": 1.3320512771606445, "epoch": 10.136986301369863, "grad_norm": 1.2252890493938353, "kl": 0.9960137009620667, "learning_rate": 2.467465753424657e-07, "loss": 0.1504, "step": 1480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1890.0, "completions/max_terminated_length": 1890.0, "completions/mean_length": 287.1785888671875, "completions/mean_terminated_length": 287.1785888671875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.148740530014038, "epoch": 10.143835616438356, "frac_reward_zero_std": 0.0, "grad_norm": 1.4035919242085813, "kl": 1.0819671154022217, "learning_rate": 2.465753424657534e-07, "loss": 0.0175, "num_tokens": 15835799.0, "reward": 0.8742067217826843, "reward_std": 0.07926204055547714, "rewards/check_gptzero_func/mean": 0.8742066621780396, "rewards/check_gptzero_func/std": 0.24899084866046906, "sampling/importance_sampling_ratio/max": 1.3910378217697144, "sampling/importance_sampling_ratio/mean": 0.9992702603340149, "sampling/importance_sampling_ratio/min": 0.6343288421630859, "sampling/sampling_logp_difference/max": 0.4551877975463867, "sampling/sampling_logp_difference/mean": 0.022198980674147606, "step": 1481 }, { "clip_ratio/high_max": 0.006688963156193495, "clip_ratio/high_mean": 0.002555147046223283, "clip_ratio/low_mean": 0.0020231325179338455, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004578279796987772, "entropy": 1.1481802463531494, "epoch": 10.150684931506849, "grad_norm": 1.3323997349428658, "kl": 1.0786421298980713, "learning_rate": 2.4640410958904107e-07, "loss": 0.0109, "step": 1482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1871.0, "completions/max_terminated_length": 1871.0, "completions/mean_length": 363.8035888671875, "completions/mean_terminated_length": 363.8035888671875, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.315256953239441, "epoch": 10.157534246575343, "frac_reward_zero_std": 0.0, "grad_norm": 1.3658967235024801, "kl": 1.083672285079956, "learning_rate": 2.462328767123288e-07, "loss": 0.0676, "num_tokens": 15860598.0, "reward": 0.8827147483825684, "reward_std": 0.032830215990543365, "rewards/check_gptzero_func/mean": 0.8827146887779236, "rewards/check_gptzero_func/std": 0.21916410326957703, "sampling/importance_sampling_ratio/max": 1.3863927125930786, "sampling/importance_sampling_ratio/mean": 0.9995408058166504, "sampling/importance_sampling_ratio/min": 0.6540263891220093, "sampling/sampling_logp_difference/max": 0.42460763454437256, "sampling/sampling_logp_difference/mean": 0.024204839020967484, "step": 1483 }, { "clip_ratio/high_max": 0.005564964842051268, "clip_ratio/high_mean": 0.0026548574678599834, "clip_ratio/low_mean": 0.0006184912635944784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032733490224927664, "entropy": 1.3197811841964722, "epoch": 10.164383561643836, "grad_norm": 1.3009570495941734, "kl": 1.086785912513733, "learning_rate": 2.460616438356164e-07, "loss": 0.0608, "step": 1484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 599.0, "completions/max_terminated_length": 599.0, "completions/mean_length": 77.51786041259766, "completions/mean_terminated_length": 77.51786041259766, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.2204477787017822, "epoch": 10.17123287671233, "frac_reward_zero_std": 0.0357142873108387, "grad_norm": 1.857097069177452, "kl": 1.7312564849853516, "learning_rate": 2.458904109589041e-07, "loss": 0.0026, "num_tokens": 15869367.0, "reward": 0.9694768786430359, "reward_std": 0.018468011170625687, "rewards/check_gptzero_func/mean": 0.9694768190383911, "rewards/check_gptzero_func/std": 0.09317558258771896, "sampling/importance_sampling_ratio/max": 1.2823752164840698, "sampling/importance_sampling_ratio/mean": 1.0007505416870117, "sampling/importance_sampling_ratio/min": 0.816590428352356, "sampling/sampling_logp_difference/max": 0.24871397018432617, "sampling/sampling_logp_difference/mean": 0.01921125315129757, "step": 1485 }, { "clip_ratio/high_max": 0.007444168906658888, "clip_ratio/high_mean": 0.0022651117760688066, "clip_ratio/low_mean": 0.0014706390211358666, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037357506807893515, "entropy": 1.2195435762405396, "epoch": 10.178082191780822, "grad_norm": 1.7956032284781331, "kl": 1.7164579629898071, "learning_rate": 2.457191780821918e-07, "loss": -0.0052, "step": 1486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2427.0, "completions/mean_length": 367.76788330078125, "completions/mean_terminated_length": 319.9090881347656, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.2624536752700806, "epoch": 10.184931506849315, "frac_reward_zero_std": 0.0, "grad_norm": 6.487447048291836, "kl": 1.418580174446106, "learning_rate": 2.4554794520547943e-07, "loss": -0.0002, "num_tokens": 15894434.0, "reward": 0.8794018030166626, "reward_std": 0.03914429992437363, "rewards/check_gptzero_func/mean": 0.8794017434120178, "rewards/check_gptzero_func/std": 0.2213873416185379, "sampling/importance_sampling_ratio/max": 1.3565335273742676, "sampling/importance_sampling_ratio/mean": 1.000774621963501, "sampling/importance_sampling_ratio/min": 0.18121056258678436, "sampling/sampling_logp_difference/max": 1.7080955505371094, "sampling/sampling_logp_difference/mean": 0.024652814492583275, "step": 1487 }, { "clip_ratio/high_max": 0.004950494971126318, "clip_ratio/high_mean": 0.0018840290140360594, "clip_ratio/low_mean": 0.0007465625531040132, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00263059139251709, "entropy": 1.267473816871643, "epoch": 10.191780821917808, "grad_norm": 1.2546029231243363, "kl": 1.401123046875, "learning_rate": 2.4537671232876713e-07, "loss": -0.0033, "step": 1488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 141.08929443359375, "completions/mean_terminated_length": 141.08929443359375, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 1.2075093984603882, "epoch": 10.198630136986301, "frac_reward_zero_std": 0.0, "grad_norm": 1.5579971503608956, "kl": 1.5120570659637451, "learning_rate": 2.452054794520548e-07, "loss": 0.1011, "num_tokens": 15907175.0, "reward": 0.9463961720466614, "reward_std": 0.025648338720202446, "rewards/check_gptzero_func/mean": 0.9463961720466614, "rewards/check_gptzero_func/std": 0.18244923651218414, "sampling/importance_sampling_ratio/max": 1.3591947555541992, "sampling/importance_sampling_ratio/mean": 1.0006920099258423, "sampling/importance_sampling_ratio/min": 0.7677637934684753, "sampling/sampling_logp_difference/max": 0.30689239501953125, "sampling/sampling_logp_difference/mean": 0.01918165758252144, "step": 1489 }, { "clip_ratio/high_max": 0.006756756920367479, "clip_ratio/high_mean": 0.0022290043998509645, "clip_ratio/low_mean": 0.0012168764369562268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003445880953222513, "entropy": 1.2057836055755615, "epoch": 10.205479452054794, "grad_norm": 1.4752943353216788, "kl": 1.5135339498519897, "learning_rate": 2.450342465753425e-07, "loss": 0.0946, "step": 1490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1502.0, "completions/max_terminated_length": 1502.0, "completions/mean_length": 208.6428680419922, "completions/mean_terminated_length": 208.6428680419922, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.41387939453125, "epoch": 10.212328767123287, "frac_reward_zero_std": 0.0, "grad_norm": 1.6043755876059107, "kl": 1.2968696355819702, "learning_rate": 2.4486301369863014e-07, "loss": 0.1064, "num_tokens": 15923927.0, "reward": 0.9529133439064026, "reward_std": 0.03882933780550957, "rewards/check_gptzero_func/mean": 0.9529132843017578, "rewards/check_gptzero_func/std": 0.12781739234924316, "sampling/importance_sampling_ratio/max": 1.3365586996078491, "sampling/importance_sampling_ratio/mean": 0.9992666244506836, "sampling/importance_sampling_ratio/min": 0.7542874813079834, "sampling/sampling_logp_difference/max": 0.2900981903076172, "sampling/sampling_logp_difference/mean": 0.021869011223316193, "step": 1491 }, { "clip_ratio/high_max": 0.005528255365788937, "clip_ratio/high_mean": 0.001170252333395183, "clip_ratio/low_mean": 0.0009312696056440473, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021015219390392303, "entropy": 1.4155837297439575, "epoch": 10.219178082191782, "grad_norm": 1.5549536826847654, "kl": 1.2949210405349731, "learning_rate": 2.446917808219178e-07, "loss": 0.0992, "step": 1492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1724.0, "completions/mean_length": 310.58929443359375, "completions/mean_terminated_length": 261.6908874511719, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.50727379322052, "epoch": 10.226027397260275, "frac_reward_zero_std": 0.0, "grad_norm": 1.4136255757226697, "kl": 1.1356045007705688, "learning_rate": 2.445205479452055e-07, "loss": 0.2001, "num_tokens": 15945818.0, "reward": 0.9050707817077637, "reward_std": 0.07189204543828964, "rewards/check_gptzero_func/mean": 0.9050707221031189, "rewards/check_gptzero_func/std": 0.20573629438877106, "sampling/importance_sampling_ratio/max": 1.386669635772705, "sampling/importance_sampling_ratio/mean": 1.0003639459609985, "sampling/importance_sampling_ratio/min": 0.651462972164154, "sampling/sampling_logp_difference/max": 0.42853474617004395, "sampling/sampling_logp_difference/mean": 0.023333260789513588, "step": 1493 }, { "clip_ratio/high_max": 0.00644468329846859, "clip_ratio/high_mean": 0.001211069873534143, "clip_ratio/low_mean": 0.0009049703949131072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021160405594855547, "entropy": 1.50727379322052, "epoch": 10.232876712328768, "grad_norm": 1.335806071839024, "kl": 1.134551763534546, "learning_rate": 2.4434931506849314e-07, "loss": 0.1937, "step": 1494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1945.0, "completions/max_terminated_length": 1945.0, "completions/mean_length": 301.8214416503906, "completions/mean_terminated_length": 301.8214416503906, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.239597201347351, "epoch": 10.23972602739726, "frac_reward_zero_std": 0.0, "grad_norm": 1.4467466193246235, "kl": 0.9413716197013855, "learning_rate": 2.4417808219178084e-07, "loss": 0.0311, "num_tokens": 15967350.0, "reward": 0.9091264009475708, "reward_std": 0.03776266425848007, "rewards/check_gptzero_func/mean": 0.909126341342926, "rewards/check_gptzero_func/std": 0.2015906274318695, "sampling/importance_sampling_ratio/max": 1.736401081085205, "sampling/importance_sampling_ratio/mean": 0.9997535347938538, "sampling/importance_sampling_ratio/min": 0.7444640398025513, "sampling/sampling_logp_difference/max": 0.5518145561218262, "sampling/sampling_logp_difference/mean": 0.021904630586504936, "step": 1495 }, { "clip_ratio/high_max": 0.004517221823334694, "clip_ratio/high_mean": 0.0018569518579170108, "clip_ratio/low_mean": 0.0015181308845058084, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003375082742422819, "entropy": 1.238080382347107, "epoch": 10.246575342465754, "grad_norm": 1.3608105183970722, "kl": 0.9401930570602417, "learning_rate": 2.440068493150685e-07, "loss": 0.024, "step": 1496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1876.0, "completions/mean_length": 367.0357360839844, "completions/mean_terminated_length": 319.16363525390625, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "entropy": 1.4554412364959717, "epoch": 10.253424657534246, "frac_reward_zero_std": 0.0, "grad_norm": 1.370392940190636, "kl": 1.2899121046066284, "learning_rate": 2.4383561643835614e-07, "loss": -0.0533, "num_tokens": 15992434.0, "reward": 0.8615076541900635, "reward_std": 0.047796837985515594, "rewards/check_gptzero_func/mean": 0.8615075945854187, "rewards/check_gptzero_func/std": 0.270092248916626, "sampling/importance_sampling_ratio/max": 1.6281830072402954, "sampling/importance_sampling_ratio/mean": 0.999336302280426, "sampling/importance_sampling_ratio/min": 0.7517813444137573, "sampling/sampling_logp_difference/max": 0.48746466636657715, "sampling/sampling_logp_difference/mean": 0.023876110091805458, "step": 1497 }, { "clip_ratio/high_max": 0.004137930925935507, "clip_ratio/high_mean": 0.0014586951583623886, "clip_ratio/low_mean": 0.0007702149450778961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022289101034402847, "entropy": 1.4554412364959717, "epoch": 10.26027397260274, "grad_norm": 1.320445140200803, "kl": 1.2676165103912354, "learning_rate": 2.436643835616438e-07, "loss": -0.0593, "step": 1498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1969.0, "completions/max_terminated_length": 1969.0, "completions/mean_length": 285.58929443359375, "completions/mean_terminated_length": 285.58929443359375, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "entropy": 1.2212679386138916, "epoch": 10.267123287671232, "frac_reward_zero_std": 0.0, "grad_norm": 1.504230823423776, "kl": 1.0158404111862183, "learning_rate": 2.434931506849315e-07, "loss": -0.0236, "num_tokens": 16013577.0, "reward": 0.9208074808120728, "reward_std": 0.03907223790884018, "rewards/check_gptzero_func/mean": 0.920807421207428, "rewards/check_gptzero_func/std": 0.19700884819030762, "sampling/importance_sampling_ratio/max": 1.4243528842926025, "sampling/importance_sampling_ratio/mean": 1.0005545616149902, "sampling/importance_sampling_ratio/min": 0.7537551522254944, "sampling/sampling_logp_difference/max": 0.353717565536499, "sampling/sampling_logp_difference/mean": 0.02268409915268421, "step": 1499 }, { "clip_ratio/high_max": 0.006301050074398518, "clip_ratio/high_mean": 0.0030628822278231382, "clip_ratio/low_mean": 0.002006967319175601, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005069849546998739, "entropy": 1.222102403640747, "epoch": 10.273972602739725, "grad_norm": 1.6594170113838909, "kl": 1.0128557682037354, "learning_rate": 2.4332191780821915e-07, "loss": -0.0305, "step": 1500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1853.0, "completions/mean_length": 331.71429443359375, "completions/mean_terminated_length": 283.1999816894531, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.4463980197906494, "epoch": 10.280821917808218, "frac_reward_zero_std": 0.0, "grad_norm": 1.3501449494582412, "kl": 1.0149996280670166, "learning_rate": 2.4315068493150685e-07, "loss": 0.1439, "num_tokens": 16036765.0, "reward": 0.9028633832931519, "reward_std": 0.0611400380730629, "rewards/check_gptzero_func/mean": 0.9028633832931519, "rewards/check_gptzero_func/std": 0.1948118656873703, "sampling/importance_sampling_ratio/max": 1.402347207069397, "sampling/importance_sampling_ratio/mean": 1.0002259016036987, "sampling/importance_sampling_ratio/min": 0.4902189075946808, "sampling/sampling_logp_difference/max": 0.7129032611846924, "sampling/sampling_logp_difference/mean": 0.022245915606617928, "step": 1501 }, { "clip_ratio/high_max": 0.005231949966400862, "clip_ratio/high_mean": 0.001670100842602551, "clip_ratio/low_mean": 0.0009548786329105496, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026249794755131006, "entropy": 1.4518159627914429, "epoch": 10.287671232876713, "grad_norm": 1.286689186023598, "kl": 1.013107419013977, "learning_rate": 2.429794520547945e-07, "loss": 0.1377, "step": 1502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2361.0, "completions/max_terminated_length": 2361.0, "completions/mean_length": 194.92857360839844, "completions/mean_terminated_length": 194.92857360839844, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 1.3344734907150269, "epoch": 10.294520547945206, "frac_reward_zero_std": 0.0, "grad_norm": 1.5872811358717096, "kl": 1.2428768873214722, "learning_rate": 2.4280821917808215e-07, "loss": 0.1695, "num_tokens": 16052829.0, "reward": 0.9496250748634338, "reward_std": 0.042935051023960114, "rewards/check_gptzero_func/mean": 0.9496250152587891, "rewards/check_gptzero_func/std": 0.18512752652168274, "sampling/importance_sampling_ratio/max": 1.464590072631836, "sampling/importance_sampling_ratio/mean": 1.0004057884216309, "sampling/importance_sampling_ratio/min": 0.7897113561630249, "sampling/sampling_logp_difference/max": 0.38157546520233154, "sampling/sampling_logp_difference/mean": 0.020398132503032684, "step": 1503 }, { "clip_ratio/high_max": 0.005566600244492292, "clip_ratio/high_mean": 0.001576607464812696, "clip_ratio/low_mean": 0.0009301013196818531, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025067089591175318, "entropy": 1.3368855714797974, "epoch": 10.301369863013699, "grad_norm": 1.4485376873829452, "kl": 1.2402280569076538, "learning_rate": 2.4263698630136986e-07, "loss": 0.1635, "step": 1504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1699.0, "completions/mean_length": 310.5714416503906, "completions/mean_terminated_length": 261.6727294921875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1766613721847534, "epoch": 10.308219178082192, "frac_reward_zero_std": 0.0, "grad_norm": 1.358050635427177, "kl": 1.0811960697174072, "learning_rate": 2.424657534246575e-07, "loss": 0.1732, "num_tokens": 16074763.0, "reward": 0.8933875560760498, "reward_std": 0.04176019877195358, "rewards/check_gptzero_func/mean": 0.893387496471405, "rewards/check_gptzero_func/std": 0.23835396766662598, "sampling/importance_sampling_ratio/max": 1.3634458780288696, "sampling/importance_sampling_ratio/mean": 1.0001429319381714, "sampling/importance_sampling_ratio/min": 0.6564818024635315, "sampling/sampling_logp_difference/max": 0.42086029052734375, "sampling/sampling_logp_difference/mean": 0.02253415435552597, "step": 1505 }, { "clip_ratio/high_max": 0.006259027402848005, "clip_ratio/high_mean": 0.0024872678332030773, "clip_ratio/low_mean": 0.002680346369743347, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0051676142029464245, "entropy": 1.1760109663009644, "epoch": 10.315068493150685, "grad_norm": 1.292279400576405, "kl": 1.07843816280365, "learning_rate": 2.422945205479452e-07, "loss": 0.167, "step": 1506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2511.0, "completions/max_terminated_length": 2511.0, "completions/mean_length": 285.5, "completions/mean_terminated_length": 285.5, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.4096609354019165, "epoch": 10.321917808219178, "frac_reward_zero_std": 0.0, "grad_norm": 1.4029679340709929, "kl": 1.0172725915908813, "learning_rate": 2.4212328767123286e-07, "loss": 0.0986, "num_tokens": 16095693.0, "reward": 0.9254556894302368, "reward_std": 0.021124040707945824, "rewards/check_gptzero_func/mean": 0.925455629825592, "rewards/check_gptzero_func/std": 0.19553041458129883, "sampling/importance_sampling_ratio/max": 1.333879828453064, "sampling/importance_sampling_ratio/mean": 0.999812126159668, "sampling/importance_sampling_ratio/min": 0.7083120346069336, "sampling/sampling_logp_difference/max": 0.34487056732177734, "sampling/sampling_logp_difference/mean": 0.023805590346455574, "step": 1507 }, { "clip_ratio/high_max": 0.00603290693834424, "clip_ratio/high_mean": 0.0021778505761176348, "clip_ratio/low_mean": 0.0008914860663935542, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030693369917571545, "entropy": 1.4151660203933716, "epoch": 10.32876712328767, "grad_norm": 1.3334262561736578, "kl": 1.0130800008773804, "learning_rate": 2.4195205479452056e-07, "loss": 0.0922, "step": 1508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714328289032, "completions/max_length": 3000.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 378.4464416503906, "completions/mean_terminated_length": 230.05661010742188, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 1.5649278163909912, "epoch": 10.335616438356164, "frac_reward_zero_std": 0.0, "grad_norm": 1.1815582914083738, "kl": 1.120430588722229, "learning_rate": 2.417808219178082e-07, "loss": 0.1561, "num_tokens": 16121780.0, "reward": 0.9352770447731018, "reward_std": 0.048165008425712585, "rewards/check_gptzero_func/mean": 0.935276985168457, "rewards/check_gptzero_func/std": 0.16551251709461212, "sampling/importance_sampling_ratio/max": 1.3793467283248901, "sampling/importance_sampling_ratio/mean": 0.9989529848098755, "sampling/importance_sampling_ratio/min": 0.417135626077652, "sampling/sampling_logp_difference/max": 0.8743438720703125, "sampling/sampling_logp_difference/mean": 0.023513445630669594, "step": 1509 }, { "clip_ratio/high_max": 0.003034901339560747, "clip_ratio/high_mean": 0.0014403770910575986, "clip_ratio/low_mean": 0.0006036693230271339, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020440462976694107, "entropy": 1.5640605688095093, "epoch": 10.342465753424657, "grad_norm": 1.152019358792002, "kl": 1.1176542043685913, "learning_rate": 2.4160958904109586e-07, "loss": 0.1505, "step": 1510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1541.0, "completions/max_terminated_length": 1541.0, "completions/mean_length": 183.44644165039062, "completions/mean_terminated_length": 183.44644165039062, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.2122232913970947, "epoch": 10.349315068493151, "frac_reward_zero_std": 0.0, "grad_norm": 1.7677005030367101, "kl": 1.4506676197052002, "learning_rate": 2.4143835616438357e-07, "loss": -0.0938, "num_tokens": 16136703.0, "reward": 0.9179962873458862, "reward_std": 0.057311996817588806, "rewards/check_gptzero_func/mean": 0.9179962873458862, "rewards/check_gptzero_func/std": 0.21080821752548218, "sampling/importance_sampling_ratio/max": 1.2864829301834106, "sampling/importance_sampling_ratio/mean": 0.9996777772903442, "sampling/importance_sampling_ratio/min": 0.7213551998138428, "sampling/sampling_logp_difference/max": 0.32662367820739746, "sampling/sampling_logp_difference/mean": 0.021181168034672737, "step": 1511 }, { "clip_ratio/high_max": 0.00890715979039669, "clip_ratio/high_mean": 0.0033689255360513926, "clip_ratio/low_mean": 0.001111349556595087, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004480275325477123, "entropy": 1.2129935026168823, "epoch": 10.356164383561644, "grad_norm": 1.69876653971886, "kl": 1.4518537521362305, "learning_rate": 2.412671232876712e-07, "loss": -0.1014, "step": 1512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 172.33929443359375, "completions/mean_terminated_length": 172.33929443359375, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 1.4109660387039185, "epoch": 10.363013698630137, "frac_reward_zero_std": 0.0, "grad_norm": 1.7518801336512289, "kl": 1.235483169555664, "learning_rate": 2.410958904109589e-07, "loss": 0.0629, "num_tokens": 16151332.0, "reward": 0.9402182698249817, "reward_std": 0.04996098205447197, "rewards/check_gptzero_func/mean": 0.9402182698249817, "rewards/check_gptzero_func/std": 0.18324527144432068, "sampling/importance_sampling_ratio/max": 1.5680747032165527, "sampling/importance_sampling_ratio/mean": 1.0005872249603271, "sampling/importance_sampling_ratio/min": 0.7621341943740845, "sampling/sampling_logp_difference/max": 0.44984865188598633, "sampling/sampling_logp_difference/mean": 0.022821206599473953, "step": 1513 }, { "clip_ratio/high_max": 0.007845701649785042, "clip_ratio/high_mean": 0.001990449847653508, "clip_ratio/low_mean": 0.0015476681292057037, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003538117976859212, "entropy": 1.4146164655685425, "epoch": 10.36986301369863, "grad_norm": 1.6671462924755547, "kl": 1.2359951734542847, "learning_rate": 2.4092465753424657e-07, "loss": 0.055, "step": 1514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 142.7857208251953, "completions/mean_terminated_length": 142.7857208251953, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "entropy": 1.331158995628357, "epoch": 10.376712328767123, "frac_reward_zero_std": 0.0, "grad_norm": 1.7369363656766141, "kl": 1.3159592151641846, "learning_rate": 2.407534246575342e-07, "loss": 0.1224, "num_tokens": 16163878.0, "reward": 0.9393877983093262, "reward_std": 0.010128721594810486, "rewards/check_gptzero_func/mean": 0.9393877387046814, "rewards/check_gptzero_func/std": 0.20490112900733948, "sampling/importance_sampling_ratio/max": 1.278503656387329, "sampling/importance_sampling_ratio/mean": 1.0001925230026245, "sampling/importance_sampling_ratio/min": 0.7910616993904114, "sampling/sampling_logp_difference/max": 0.24569034576416016, "sampling/sampling_logp_difference/mean": 0.020616471767425537, "step": 1515 }, { "clip_ratio/high_max": 0.00688791461288929, "clip_ratio/high_mean": 0.0015777645166963339, "clip_ratio/low_mean": 0.0024544005282223225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0040321652777493, "entropy": 1.3317595720291138, "epoch": 10.383561643835616, "grad_norm": 1.639617022318775, "kl": 1.3133822679519653, "learning_rate": 2.4058219178082187e-07, "loss": 0.115, "step": 1516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1806.0, "completions/max_terminated_length": 1806.0, "completions/mean_length": 312.1785888671875, "completions/mean_terminated_length": 312.1785888671875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.2703710794448853, "epoch": 10.39041095890411, "frac_reward_zero_std": 0.0, "grad_norm": 1.4107814135395382, "kl": 1.1809369325637817, "learning_rate": 2.404109589041096e-07, "loss": 0.0387, "num_tokens": 16185916.0, "reward": 0.932982325553894, "reward_std": 0.022999772801995277, "rewards/check_gptzero_func/mean": 0.932982325553894, "rewards/check_gptzero_func/std": 0.1536557972431183, "sampling/importance_sampling_ratio/max": 1.408327579498291, "sampling/importance_sampling_ratio/mean": 1.0003470182418823, "sampling/importance_sampling_ratio/min": 0.6803417801856995, "sampling/sampling_logp_difference/max": 0.385159969329834, "sampling/sampling_logp_difference/mean": 0.023378415033221245, "step": 1517 }, { "clip_ratio/high_max": 0.006069802679121494, "clip_ratio/high_mean": 0.0021843011491000652, "clip_ratio/low_mean": 0.0010853090789169073, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032696102280169725, "entropy": 1.2715271711349487, "epoch": 10.397260273972602, "grad_norm": 1.3590572606036344, "kl": 1.18035888671875, "learning_rate": 2.402397260273973e-07, "loss": 0.0321, "step": 1518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1927.0, "completions/mean_length": 372.6607360839844, "completions/mean_terminated_length": 324.8908996582031, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.452317237854004, "epoch": 10.404109589041095, "frac_reward_zero_std": 0.0, "grad_norm": 1.1828760513522814, "kl": 0.8775010108947754, "learning_rate": 2.4006849315068493e-07, "loss": 0.2047, "num_tokens": 16211745.0, "reward": 0.9152243733406067, "reward_std": 0.028617212548851967, "rewards/check_gptzero_func/mean": 0.9152243733406067, "rewards/check_gptzero_func/std": 0.18789498507976532, "sampling/importance_sampling_ratio/max": 1.6921006441116333, "sampling/importance_sampling_ratio/mean": 0.9995920062065125, "sampling/importance_sampling_ratio/min": 0.6973525881767273, "sampling/sampling_logp_difference/max": 0.5259706974029541, "sampling/sampling_logp_difference/mean": 0.02278716303408146, "step": 1519 }, { "clip_ratio/high_max": 0.0033154806587845087, "clip_ratio/high_mean": 0.0010874092113226652, "clip_ratio/low_mean": 0.0014246973441913724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025121066719293594, "entropy": 1.452317237854004, "epoch": 10.41095890410959, "grad_norm": 1.1495940097793753, "kl": 0.8737141489982605, "learning_rate": 2.398972602739726e-07, "loss": 0.1995, "step": 1520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2128.0, "completions/max_terminated_length": 2128.0, "completions/mean_length": 319.21429443359375, "completions/mean_terminated_length": 319.21429443359375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.3319584131240845, "epoch": 10.417808219178083, "frac_reward_zero_std": 0.0, "grad_norm": 1.3834220268462658, "kl": 1.132627248764038, "learning_rate": 2.3972602739726023e-07, "loss": 0.129, "num_tokens": 16234321.0, "reward": 0.9104312658309937, "reward_std": 0.040111374109983444, "rewards/check_gptzero_func/mean": 0.9104312658309937, "rewards/check_gptzero_func/std": 0.17989695072174072, "sampling/importance_sampling_ratio/max": 1.7866065502166748, "sampling/importance_sampling_ratio/mean": 1.0011823177337646, "sampling/importance_sampling_ratio/min": 0.7221609950065613, "sampling/sampling_logp_difference/max": 0.5803179740905762, "sampling/sampling_logp_difference/mean": 0.022977354004979134, "step": 1521 }, { "clip_ratio/high_max": 0.0036613272968679667, "clip_ratio/high_mean": 0.0014103848952800035, "clip_ratio/low_mean": 0.0015436556423082948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002954040886834264, "entropy": 1.334460973739624, "epoch": 10.424657534246576, "grad_norm": 1.3288527019537097, "kl": 1.1343110799789429, "learning_rate": 2.3955479452054793e-07, "loss": 0.123, "step": 1522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1674.0, "completions/mean_length": 327.71429443359375, "completions/mean_terminated_length": 279.12725830078125, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.388865351676941, "epoch": 10.431506849315069, "frac_reward_zero_std": 0.0, "grad_norm": 1.438557274461593, "kl": 1.072692632675171, "learning_rate": 2.393835616438356e-07, "loss": 0.1332, "num_tokens": 16257791.0, "reward": 0.9003584980964661, "reward_std": 0.05916375294327736, "rewards/check_gptzero_func/mean": 0.9003585577011108, "rewards/check_gptzero_func/std": 0.21197380125522614, "sampling/importance_sampling_ratio/max": 1.3188921213150024, "sampling/importance_sampling_ratio/mean": 0.9996766448020935, "sampling/importance_sampling_ratio/min": 0.5781853199005127, "sampling/sampling_logp_difference/max": 0.547860860824585, "sampling/sampling_logp_difference/mean": 0.023216376081109047, "step": 1523 }, { "clip_ratio/high_max": 0.003890293650329113, "clip_ratio/high_mean": 0.0014410974690690637, "clip_ratio/low_mean": 0.0017761343624442816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032172314822673798, "entropy": 1.3909614086151123, "epoch": 10.438356164383562, "grad_norm": 1.3831378900082025, "kl": 1.0692583322525024, "learning_rate": 2.392123287671233e-07, "loss": 0.1266, "step": 1524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1865.0, "completions/mean_length": 307.6071472167969, "completions/mean_terminated_length": 258.654541015625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.5891033411026, "epoch": 10.445205479452055, "frac_reward_zero_std": 0.0, "grad_norm": 1.3185866952191483, "kl": 0.9570764899253845, "learning_rate": 2.3904109589041094e-07, "loss": 0.2332, "num_tokens": 16280001.0, "reward": 0.9111788868904114, "reward_std": 0.08156944811344147, "rewards/check_gptzero_func/mean": 0.9111788868904114, "rewards/check_gptzero_func/std": 0.2244017869234085, "sampling/importance_sampling_ratio/max": 1.4623078107833862, "sampling/importance_sampling_ratio/mean": 0.9996331334114075, "sampling/importance_sampling_ratio/min": 0.7023858428001404, "sampling/sampling_logp_difference/max": 0.38001585006713867, "sampling/sampling_logp_difference/mean": 0.024647420272231102, "step": 1525 }, { "clip_ratio/high_max": 0.003196347039192915, "clip_ratio/high_mean": 0.000994838890619576, "clip_ratio/low_mean": 0.0017681177705526352, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002762956777587533, "entropy": 1.592063307762146, "epoch": 10.452054794520548, "grad_norm": 1.2806470667306216, "kl": 0.9588320851325989, "learning_rate": 2.3886986301369864e-07, "loss": 0.2275, "step": 1526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2342.0, "completions/max_terminated_length": 2342.0, "completions/mean_length": 285.96429443359375, "completions/mean_terminated_length": 285.96429443359375, "completions/min_length": 9.0, "completions/min_terminated_length": 9.0, "entropy": 1.395887017250061, "epoch": 10.45890410958904, "frac_reward_zero_std": 0.0, "grad_norm": 1.4521380237945491, "kl": 1.145153284072876, "learning_rate": 2.386986301369863e-07, "loss": -0.0299, "num_tokens": 16300873.0, "reward": 0.9316893815994263, "reward_std": 0.039257630705833435, "rewards/check_gptzero_func/mean": 0.9316893219947815, "rewards/check_gptzero_func/std": 0.15034157037734985, "sampling/importance_sampling_ratio/max": 1.459050178527832, "sampling/importance_sampling_ratio/mean": 0.9998384714126587, "sampling/importance_sampling_ratio/min": 0.6181440353393555, "sampling/sampling_logp_difference/max": 0.4810338020324707, "sampling/sampling_logp_difference/mean": 0.02229960449039936, "step": 1527 }, { "clip_ratio/high_max": 0.006185838486999273, "clip_ratio/high_mean": 0.0011985533637925982, "clip_ratio/low_mean": 0.0010745825711637735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022731360513716936, "entropy": 1.3935930728912354, "epoch": 10.465753424657533, "grad_norm": 1.3981841021317922, "kl": 1.1436413526535034, "learning_rate": 2.3852739726027394e-07, "loss": -0.0362, "step": 1528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1275.0, "completions/mean_length": 207.35714721679688, "completions/mean_terminated_length": 156.58181762695312, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.5196802616119385, "epoch": 10.472602739726028, "frac_reward_zero_std": 0.0, "grad_norm": 1.398255786589724, "kl": 1.0991359949111938, "learning_rate": 2.3835616438356162e-07, "loss": 0.3403, "num_tokens": 16317563.0, "reward": 0.9685444831848145, "reward_std": 0.031403712928295135, "rewards/check_gptzero_func/mean": 0.9685444235801697, "rewards/check_gptzero_func/std": 0.09982339292764664, "sampling/importance_sampling_ratio/max": 1.3110542297363281, "sampling/importance_sampling_ratio/mean": 0.9999268651008606, "sampling/importance_sampling_ratio/min": 0.7737516760826111, "sampling/sampling_logp_difference/max": 0.2708315849304199, "sampling/sampling_logp_difference/mean": 0.022590043023228645, "step": 1529 }, { "clip_ratio/high_max": 0.002751031657680869, "clip_ratio/high_mean": 0.0005570706562139094, "clip_ratio/low_mean": 0.0015705280238762498, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00212759873829782, "entropy": 1.5207418203353882, "epoch": 10.479452054794521, "grad_norm": 1.3620022499461915, "kl": 1.094645619392395, "learning_rate": 2.3818493150684932e-07, "loss": 0.3344, "step": 1530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2322.0, "completions/max_terminated_length": 2322.0, "completions/mean_length": 164.75, "completions/mean_terminated_length": 164.75, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 1.3443257808685303, "epoch": 10.486301369863014, "frac_reward_zero_std": 0.0, "grad_norm": 1.8120558994187537, "kl": 1.3296724557876587, "learning_rate": 2.3801369863013697e-07, "loss": 0.0763, "num_tokens": 16331761.0, "reward": 0.9604876637458801, "reward_std": 0.0439610630273819, "rewards/check_gptzero_func/mean": 0.9604876637458801, "rewards/check_gptzero_func/std": 0.15165390074253082, "sampling/importance_sampling_ratio/max": 1.4985086917877197, "sampling/importance_sampling_ratio/mean": 1.0000441074371338, "sampling/importance_sampling_ratio/min": 0.6886193156242371, "sampling/sampling_logp_difference/max": 0.40447044372558594, "sampling/sampling_logp_difference/mean": 0.021449023857712746, "step": 1531 }, { "clip_ratio/high_max": 0.008469193242490292, "clip_ratio/high_mean": 0.00174131675157696, "clip_ratio/low_mean": 0.002503545256331563, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004244861658662558, "entropy": 1.3443257808685303, "epoch": 10.493150684931507, "grad_norm": 1.64029796263853, "kl": 1.3300788402557373, "learning_rate": 2.3784246575342465e-07, "loss": 0.0692, "step": 1532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2825.0, "completions/max_terminated_length": 2825.0, "completions/mean_length": 224.0535888671875, "completions/mean_terminated_length": 224.0535888671875, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 1.6796891689300537, "epoch": 10.5, "frac_reward_zero_std": 0.0, "grad_norm": 1.4134062666416085, "kl": 1.113013505935669, "learning_rate": 2.3767123287671233e-07, "loss": 0.2308, "num_tokens": 16349066.0, "reward": 0.9690600037574768, "reward_std": 0.03303011134266853, "rewards/check_gptzero_func/mean": 0.969059944152832, "rewards/check_gptzero_func/std": 0.08872278779745102, "sampling/importance_sampling_ratio/max": 1.2852014303207397, "sampling/importance_sampling_ratio/mean": 0.9994390606880188, "sampling/importance_sampling_ratio/min": 0.7646075487136841, "sampling/sampling_logp_difference/max": 0.26839256286621094, "sampling/sampling_logp_difference/mean": 0.023285796865820885, "step": 1533 }, { "clip_ratio/high_max": 0.004562303889542818, "clip_ratio/high_mean": 0.0010243679862469435, "clip_ratio/low_mean": 0.0011313118739053607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021556797437369823, "entropy": 1.6807632446289062, "epoch": 10.506849315068493, "grad_norm": 1.3679626380978185, "kl": 1.10551917552948, "learning_rate": 2.3749999999999998e-07, "loss": 0.2249, "step": 1534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2467.0, "completions/max_terminated_length": 2467.0, "completions/mean_length": 334.6607360839844, "completions/mean_terminated_length": 334.6607360839844, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 1.5138986110687256, "epoch": 10.513698630136986, "frac_reward_zero_std": 0.0, "grad_norm": 1.255466876090323, "kl": 0.9100238084793091, "learning_rate": 2.3732876712328768e-07, "loss": 0.0927, "num_tokens": 16373041.0, "reward": 0.9233653545379639, "reward_std": 0.02808878757059574, "rewards/check_gptzero_func/mean": 0.9233652949333191, "rewards/check_gptzero_func/std": 0.15971176326274872, "sampling/importance_sampling_ratio/max": 1.8438249826431274, "sampling/importance_sampling_ratio/mean": 0.9997782111167908, "sampling/importance_sampling_ratio/min": 0.6739583015441895, "sampling/sampling_logp_difference/max": 0.6118422746658325, "sampling/sampling_logp_difference/mean": 0.025174425914883614, "step": 1535 }, { "clip_ratio/high_max": 0.0030775994528084993, "clip_ratio/high_mean": 0.001317524933256209, "clip_ratio/low_mean": 0.0007966504199430346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021141753531992435, "entropy": 1.5138986110687256, "epoch": 10.520547945205479, "grad_norm": 1.217120426426696, "kl": 0.9088236689567566, "learning_rate": 2.3715753424657533e-07, "loss": 0.0871, "step": 1536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 301.4464416503906, "completions/mean_terminated_length": 252.38180541992188, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.647826075553894, "epoch": 10.527397260273972, "frac_reward_zero_std": 0.0, "grad_norm": 1.3839311658860385, "kl": 0.8992571234703064, "learning_rate": 2.36986301369863e-07, "loss": 0.234, "num_tokens": 16394470.0, "reward": 0.9280802607536316, "reward_std": 0.0304707158356905, "rewards/check_gptzero_func/mean": 0.9280802011489868, "rewards/check_gptzero_func/std": 0.17248833179473877, "sampling/importance_sampling_ratio/max": 1.5218195915222168, "sampling/importance_sampling_ratio/mean": 1.0001039505004883, "sampling/importance_sampling_ratio/min": 0.7025130987167358, "sampling/sampling_logp_difference/max": 0.41990673542022705, "sampling/sampling_logp_difference/mean": 0.024700136855244637, "step": 1537 }, { "clip_ratio/high_max": 0.00456769997254014, "clip_ratio/high_mean": 0.001466618268750608, "clip_ratio/low_mean": 0.0018051333026960492, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032717513386160135, "entropy": 1.6448150873184204, "epoch": 10.534246575342467, "grad_norm": 1.3258017630390748, "kl": 0.8997237086296082, "learning_rate": 2.3681506849315066e-07, "loss": 0.2279, "step": 1538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2409.0, "completions/max_terminated_length": 2409.0, "completions/mean_length": 296.6071472167969, "completions/mean_terminated_length": 296.6071472167969, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.4568290710449219, "epoch": 10.54109589041096, "frac_reward_zero_std": 0.0, "grad_norm": 1.405136425429624, "kl": 1.1953147649765015, "learning_rate": 2.3664383561643836e-07, "loss": 0.1487, "num_tokens": 16416374.0, "reward": 0.9194772243499756, "reward_std": 0.06717827916145325, "rewards/check_gptzero_func/mean": 0.9194771647453308, "rewards/check_gptzero_func/std": 0.21170756220817566, "sampling/importance_sampling_ratio/max": 1.469068169593811, "sampling/importance_sampling_ratio/mean": 1.0003873109817505, "sampling/importance_sampling_ratio/min": 0.645635187625885, "sampling/sampling_logp_difference/max": 0.43752074241638184, "sampling/sampling_logp_difference/mean": 0.02430013008415699, "step": 1539 }, { "clip_ratio/high_max": 0.00576326297596097, "clip_ratio/high_mean": 0.0021010448690503836, "clip_ratio/low_mean": 0.0021394158247858286, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004240460693836212, "entropy": 1.455865502357483, "epoch": 10.547945205479452, "grad_norm": 1.3264021874615426, "kl": 1.1944681406021118, "learning_rate": 2.36472602739726e-07, "loss": 0.1429, "step": 1540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2480.0, "completions/max_terminated_length": 2480.0, "completions/mean_length": 246.9107208251953, "completions/mean_terminated_length": 246.9107208251953, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.3091294765472412, "epoch": 10.554794520547945, "frac_reward_zero_std": 0.0, "grad_norm": 1.5000880695745225, "kl": 1.2169350385665894, "learning_rate": 2.363013698630137e-07, "loss": 0.0434, "num_tokens": 16434941.0, "reward": 0.9054099917411804, "reward_std": 0.07356756925582886, "rewards/check_gptzero_func/mean": 0.9054099917411804, "rewards/check_gptzero_func/std": 0.23439311981201172, "sampling/importance_sampling_ratio/max": 1.3625379800796509, "sampling/importance_sampling_ratio/mean": 0.999420166015625, "sampling/importance_sampling_ratio/min": 0.7618783712387085, "sampling/sampling_logp_difference/max": 0.30934906005859375, "sampling/sampling_logp_difference/mean": 0.022673552855849266, "step": 1541 }, { "clip_ratio/high_max": 0.003197953337803483, "clip_ratio/high_mean": 0.002187196398153901, "clip_ratio/low_mean": 0.0013887349050492048, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035759310703724623, "entropy": 1.3116885423660278, "epoch": 10.561643835616438, "grad_norm": 1.4525984371492018, "kl": 1.2173806428909302, "learning_rate": 2.3613013698630136e-07, "loss": 0.037, "step": 1542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 269.64288330078125, "completions/mean_terminated_length": 220.0, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.4428247213363647, "epoch": 10.568493150684931, "frac_reward_zero_std": 0.0, "grad_norm": 1.3934370294641707, "kl": 1.0958690643310547, "learning_rate": 2.3595890410958901e-07, "loss": 0.2342, "num_tokens": 16454699.0, "reward": 0.9518868923187256, "reward_std": 0.028867680579423904, "rewards/check_gptzero_func/mean": 0.9518868327140808, "rewards/check_gptzero_func/std": 0.12842273712158203, "sampling/importance_sampling_ratio/max": 1.4558277130126953, "sampling/importance_sampling_ratio/mean": 0.9997422099113464, "sampling/importance_sampling_ratio/min": 0.6414197087287903, "sampling/sampling_logp_difference/max": 0.44407129287719727, "sampling/sampling_logp_difference/mean": 0.02367815002799034, "step": 1543 }, { "clip_ratio/high_max": 0.008123249746859074, "clip_ratio/high_mean": 0.0020452814642339945, "clip_ratio/low_mean": 0.0022548181004822254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004300099331885576, "entropy": 1.4376561641693115, "epoch": 10.575342465753424, "grad_norm": 1.3115363157079558, "kl": 1.0967317819595337, "learning_rate": 2.3578767123287672e-07, "loss": 0.2282, "step": 1544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2501.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 325.0, "completions/mean_terminated_length": 325.0, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.3819801807403564, "epoch": 10.582191780821917, "frac_reward_zero_std": 0.0, "grad_norm": 1.4335724983694251, "kl": 1.1495333909988403, "learning_rate": 2.3561643835616437e-07, "loss": 0.1279, "num_tokens": 16477963.0, "reward": 0.9314751625061035, "reward_std": 0.03782041370868683, "rewards/check_gptzero_func/mean": 0.9314751029014587, "rewards/check_gptzero_func/std": 0.1543624848127365, "sampling/importance_sampling_ratio/max": 1.3087457418441772, "sampling/importance_sampling_ratio/mean": 1.0000674724578857, "sampling/importance_sampling_ratio/min": 0.6974503993988037, "sampling/sampling_logp_difference/max": 0.3603239059448242, "sampling/sampling_logp_difference/mean": 0.023153377696871758, "step": 1545 }, { "clip_ratio/high_max": 0.0052526262588799, "clip_ratio/high_mean": 0.0020430830772966146, "clip_ratio/low_mean": 0.0011779379565268755, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003221021266654134, "entropy": 1.3839566707611084, "epoch": 10.58904109589041, "grad_norm": 1.294226039535071, "kl": 1.1329485177993774, "learning_rate": 2.3544520547945205e-07, "loss": 0.1219, "step": 1546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1621.0, "completions/max_terminated_length": 1621.0, "completions/mean_length": 226.9285888671875, "completions/mean_terminated_length": 226.9285888671875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.4682161808013916, "epoch": 10.595890410958905, "frac_reward_zero_std": 0.0, "grad_norm": 1.573818965466119, "kl": 1.1493538618087769, "learning_rate": 2.352739726027397e-07, "loss": 0.0674, "num_tokens": 16495625.0, "reward": 0.9190442562103271, "reward_std": 0.04470725357532501, "rewards/check_gptzero_func/mean": 0.9190441966056824, "rewards/check_gptzero_func/std": 0.21347874402999878, "sampling/importance_sampling_ratio/max": 1.5363301038742065, "sampling/importance_sampling_ratio/mean": 0.9993637800216675, "sampling/importance_sampling_ratio/min": 0.7006677389144897, "sampling/sampling_logp_difference/max": 0.42939651012420654, "sampling/sampling_logp_difference/mean": 0.023660294711589813, "step": 1547 }, { "clip_ratio/high_max": 0.00580329867079854, "clip_ratio/high_mean": 0.002309985924512148, "clip_ratio/low_mean": 0.0011161741567775607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034261601977050304, "entropy": 1.4682161808013916, "epoch": 10.602739726027398, "grad_norm": 1.492986171684636, "kl": 1.1499541997909546, "learning_rate": 2.351027397260274e-07, "loss": 0.0608, "step": 1548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2033.0, "completions/max_terminated_length": 2033.0, "completions/mean_length": 253.0357208251953, "completions/mean_terminated_length": 253.0357208251953, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.6387830972671509, "epoch": 10.60958904109589, "frac_reward_zero_std": 0.0, "grad_norm": 1.5481144905495627, "kl": 1.2975279092788696, "learning_rate": 2.3493150684931508e-07, "loss": 0.1198, "num_tokens": 16514319.0, "reward": 0.9489531517028809, "reward_std": 0.04240580275654793, "rewards/check_gptzero_func/mean": 0.9489530920982361, "rewards/check_gptzero_func/std": 0.1300528347492218, "sampling/importance_sampling_ratio/max": 1.298487901687622, "sampling/importance_sampling_ratio/mean": 0.999626636505127, "sampling/importance_sampling_ratio/min": 0.7110212445259094, "sampling/sampling_logp_difference/max": 0.3410530090332031, "sampling/sampling_logp_difference/mean": 0.023963283747434616, "step": 1549 }, { "clip_ratio/high_max": 0.005399730056524277, "clip_ratio/high_mean": 0.0014125487068668008, "clip_ratio/low_mean": 0.0006814917433075607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020940403919667006, "entropy": 1.64000403881073, "epoch": 10.616438356164384, "grad_norm": 1.5100510202330522, "kl": 1.293329119682312, "learning_rate": 2.3476027397260273e-07, "loss": 0.113, "step": 1550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2032.0, "completions/max_terminated_length": 2032.0, "completions/mean_length": 243.8928680419922, "completions/mean_terminated_length": 243.8928680419922, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.588305115699768, "epoch": 10.623287671232877, "frac_reward_zero_std": 0.0, "grad_norm": 1.5347799591476734, "kl": 0.9975067377090454, "learning_rate": 2.345890410958904e-07, "loss": 0.1093, "num_tokens": 16532989.0, "reward": 0.9094496369361877, "reward_std": 0.05533214285969734, "rewards/check_gptzero_func/mean": 0.909449577331543, "rewards/check_gptzero_func/std": 0.22373810410499573, "sampling/importance_sampling_ratio/max": 1.3705600500106812, "sampling/importance_sampling_ratio/mean": 0.9997380971908569, "sampling/importance_sampling_ratio/min": 0.6987082362174988, "sampling/sampling_logp_difference/max": 0.3585219383239746, "sampling/sampling_logp_difference/mean": 0.02338532544672489, "step": 1551 }, { "clip_ratio/high_max": 0.005246350541710854, "clip_ratio/high_mean": 0.0020888159051537514, "clip_ratio/low_mean": 0.0015537863364443183, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036426023580133915, "entropy": 1.5849353075027466, "epoch": 10.63013698630137, "grad_norm": 1.4808441073215934, "kl": 0.9932082295417786, "learning_rate": 2.3441780821917805e-07, "loss": 0.1027, "step": 1552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1863.0, "completions/mean_length": 349.4285888671875, "completions/mean_terminated_length": 301.2363586425781, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 1.5600273609161377, "epoch": 10.636986301369863, "frac_reward_zero_std": 0.0, "grad_norm": 1.3184522341973373, "kl": 1.0139896869659424, "learning_rate": 2.3424657534246576e-07, "loss": 0.1307, "num_tokens": 16557529.0, "reward": 0.9126542210578918, "reward_std": 0.03581813722848892, "rewards/check_gptzero_func/mean": 0.9126542210578918, "rewards/check_gptzero_func/std": 0.19979199767112732, "sampling/importance_sampling_ratio/max": 1.3468854427337646, "sampling/importance_sampling_ratio/mean": 0.9997562766075134, "sampling/importance_sampling_ratio/min": 0.7228543758392334, "sampling/sampling_logp_difference/max": 0.32454752922058105, "sampling/sampling_logp_difference/mean": 0.024170825257897377, "step": 1553 }, { "clip_ratio/high_max": 0.004869251511991024, "clip_ratio/high_mean": 0.001961345784366131, "clip_ratio/low_mean": 0.0010023608338087797, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002963706851005554, "entropy": 1.5554559230804443, "epoch": 10.643835616438356, "grad_norm": 1.2647538120216055, "kl": 1.0132066011428833, "learning_rate": 2.340753424657534e-07, "loss": 0.1248, "step": 1554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2044.0, "completions/max_terminated_length": 2044.0, "completions/mean_length": 190.73214721679688, "completions/mean_terminated_length": 190.73214721679688, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "entropy": 1.2585468292236328, "epoch": 10.650684931506849, "frac_reward_zero_std": 0.0, "grad_norm": 1.630305129782961, "kl": 1.4177682399749756, "learning_rate": 2.3390410958904108e-07, "loss": 0.0547, "num_tokens": 16573446.0, "reward": 0.9559586644172668, "reward_std": 0.01831154339015484, "rewards/check_gptzero_func/mean": 0.9559586644172668, "rewards/check_gptzero_func/std": 0.12537667155265808, "sampling/importance_sampling_ratio/max": 1.2654690742492676, "sampling/importance_sampling_ratio/mean": 0.9999004006385803, "sampling/importance_sampling_ratio/min": 0.6761301159858704, "sampling/sampling_logp_difference/max": 0.3913698196411133, "sampling/sampling_logp_difference/mean": 0.020456520840525627, "step": 1555 }, { "clip_ratio/high_max": 0.006666666828095913, "clip_ratio/high_mean": 0.0023450052831321955, "clip_ratio/low_mean": 0.0010240571573376656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033690626733005047, "entropy": 1.2594261169433594, "epoch": 10.657534246575342, "grad_norm": 1.5356292214930471, "kl": 1.4009885787963867, "learning_rate": 2.3373287671232873e-07, "loss": 0.0477, "step": 1556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1861.0, "completions/max_terminated_length": 1861.0, "completions/mean_length": 197.19644165039062, "completions/mean_terminated_length": 197.19644165039062, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.3391252756118774, "epoch": 10.664383561643836, "frac_reward_zero_std": 0.0, "grad_norm": 1.6083832677629635, "kl": 1.4256826639175415, "learning_rate": 2.3356164383561644e-07, "loss": 0.069, "num_tokens": 16589605.0, "reward": 0.9643435478210449, "reward_std": 0.010924425907433033, "rewards/check_gptzero_func/mean": 0.9643434882164001, "rewards/check_gptzero_func/std": 0.1125815287232399, "sampling/importance_sampling_ratio/max": 1.2879327535629272, "sampling/importance_sampling_ratio/mean": 1.0002979040145874, "sampling/importance_sampling_ratio/min": 0.5547105669975281, "sampling/sampling_logp_difference/max": 0.5893087387084961, "sampling/sampling_logp_difference/mean": 0.02188294753432274, "step": 1557 }, { "clip_ratio/high_max": 0.005032563582062721, "clip_ratio/high_mean": 0.0016195758944377303, "clip_ratio/low_mean": 0.002529361518099904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0041489372961223125, "entropy": 1.3391252756118774, "epoch": 10.67123287671233, "grad_norm": 1.519402754263783, "kl": 1.4210052490234375, "learning_rate": 2.3339041095890411e-07, "loss": 0.0621, "step": 1558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 198.94644165039062, "completions/mean_terminated_length": 198.94644165039062, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.46090567111969, "epoch": 10.678082191780822, "frac_reward_zero_std": 0.0, "grad_norm": 1.6473048366671108, "kl": 1.3428289890289307, "learning_rate": 2.3321917808219177e-07, "loss": 0.0926, "num_tokens": 16605796.0, "reward": 0.9553982615470886, "reward_std": 0.018398134037852287, "rewards/check_gptzero_func/mean": 0.9553982019424438, "rewards/check_gptzero_func/std": 0.12977157533168793, "sampling/importance_sampling_ratio/max": 1.3109891414642334, "sampling/importance_sampling_ratio/mean": 1.0002477169036865, "sampling/importance_sampling_ratio/min": 0.722199559211731, "sampling/sampling_logp_difference/max": 0.3254537582397461, "sampling/sampling_logp_difference/mean": 0.023901965469121933, "step": 1559 }, { "clip_ratio/high_max": 0.004611564334481955, "clip_ratio/high_mean": 0.0016115086618810892, "clip_ratio/low_mean": 0.00124169304035604, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002853201702237129, "entropy": 1.4628328084945679, "epoch": 10.684931506849315, "grad_norm": 1.5732761817144585, "kl": 1.3428289890289307, "learning_rate": 2.3304794520547944e-07, "loss": 0.0859, "step": 1560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1743.0, "completions/max_terminated_length": 1743.0, "completions/mean_length": 270.125, "completions/mean_terminated_length": 270.125, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "entropy": 1.402773141860962, "epoch": 10.691780821917808, "frac_reward_zero_std": 0.0, "grad_norm": 1.440686914878897, "kl": 1.0879091024398804, "learning_rate": 2.328767123287671e-07, "loss": 0.0963, "num_tokens": 16626777.0, "reward": 0.9067734479904175, "reward_std": 0.07235994935035706, "rewards/check_gptzero_func/mean": 0.9067734479904175, "rewards/check_gptzero_func/std": 0.24331901967525482, "sampling/importance_sampling_ratio/max": 1.2923449277877808, "sampling/importance_sampling_ratio/mean": 1.0003938674926758, "sampling/importance_sampling_ratio/min": 0.5193583369255066, "sampling/sampling_logp_difference/max": 0.6551612615585327, "sampling/sampling_logp_difference/mean": 0.023699278011918068, "step": 1561 }, { "clip_ratio/high_max": 0.0045652627013623714, "clip_ratio/high_mean": 0.0013520222855731845, "clip_ratio/low_mean": 0.0011472160695120692, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00249923812225461, "entropy": 1.4050532579421997, "epoch": 10.698630136986301, "grad_norm": 1.4655435047478134, "kl": 1.084760069847107, "learning_rate": 2.327054794520548e-07, "loss": 0.09, "step": 1562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142873108387, "completions/max_length": 3000.0, "completions/max_terminated_length": 1632.0, "completions/mean_length": 328.5535888671875, "completions/mean_terminated_length": 229.61111450195312, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "entropy": 1.2425785064697266, "epoch": 10.705479452054794, "frac_reward_zero_std": 0.0, "grad_norm": 1.2011901817725406, "kl": 1.2275694608688354, "learning_rate": 2.3253424657534245e-07, "loss": -0.0111, "num_tokens": 16650076.0, "reward": 0.9072740077972412, "reward_std": 0.05415337532758713, "rewards/check_gptzero_func/mean": 0.9072739481925964, "rewards/check_gptzero_func/std": 0.18894590437412262, "sampling/importance_sampling_ratio/max": 1.4255576133728027, "sampling/importance_sampling_ratio/mean": 0.999674916267395, "sampling/importance_sampling_ratio/min": 0.7023724317550659, "sampling/sampling_logp_difference/max": 0.35456299781799316, "sampling/sampling_logp_difference/mean": 0.021501533687114716, "step": 1563 }, { "clip_ratio/high_max": 0.005539313890039921, "clip_ratio/high_mean": 0.002314361510798335, "clip_ratio/low_mean": 0.001082823844626546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003397185355424881, "entropy": 1.2425785064697266, "epoch": 10.712328767123287, "grad_norm": 1.1403700059157404, "kl": 1.223527193069458, "learning_rate": 2.3236301369863012e-07, "loss": -0.0166, "step": 1564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1397.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 150.5178680419922, "completions/mean_terminated_length": 150.5178680419922, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.5071829557418823, "epoch": 10.719178082191782, "frac_reward_zero_std": 0.0, "grad_norm": 1.6117260174000634, "kl": 1.4249727725982666, "learning_rate": 2.321917808219178e-07, "loss": 0.1294, "num_tokens": 16663317.0, "reward": 0.9648538827896118, "reward_std": 0.024504275992512703, "rewards/check_gptzero_func/mean": 0.964853823184967, "rewards/check_gptzero_func/std": 0.10804542154073715, "sampling/importance_sampling_ratio/max": 1.2654356956481934, "sampling/importance_sampling_ratio/mean": 0.9999142289161682, "sampling/importance_sampling_ratio/min": 0.7863004803657532, "sampling/sampling_logp_difference/max": 0.2404162883758545, "sampling/sampling_logp_difference/mean": 0.02190587855875492, "step": 1565 }, { "clip_ratio/high_max": 0.0015847861068323255, "clip_ratio/high_mean": 0.0007569785811938345, "clip_ratio/low_mean": 0.00166989304125309, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002426871797069907, "entropy": 1.5104745626449585, "epoch": 10.726027397260275, "grad_norm": 1.5481859010192984, "kl": 1.4123382568359375, "learning_rate": 2.3202054794520548e-07, "loss": 0.1231, "step": 1566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1961.0, "completions/max_terminated_length": 1961.0, "completions/mean_length": 289.2321472167969, "completions/mean_terminated_length": 289.2321472167969, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.410652995109558, "epoch": 10.732876712328768, "frac_reward_zero_std": 0.0, "grad_norm": 1.4717910280918598, "kl": 1.2277992963790894, "learning_rate": 2.3184931506849315e-07, "loss": 0.1065, "num_tokens": 16683964.0, "reward": 0.9240017533302307, "reward_std": 0.03581565245985985, "rewards/check_gptzero_func/mean": 0.9240016341209412, "rewards/check_gptzero_func/std": 0.17002573609352112, "sampling/importance_sampling_ratio/max": 1.3868790864944458, "sampling/importance_sampling_ratio/mean": 0.9998200535774231, "sampling/importance_sampling_ratio/min": 0.6832703948020935, "sampling/sampling_logp_difference/max": 0.38086462020874023, "sampling/sampling_logp_difference/mean": 0.023020396009087563, "step": 1567 }, { "clip_ratio/high_max": 0.0049242423847317696, "clip_ratio/high_mean": 0.0017109110485762358, "clip_ratio/low_mean": 0.0005896711954846978, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023005823604762554, "entropy": 1.4120852947235107, "epoch": 10.73972602739726, "grad_norm": 1.4388632145522948, "kl": 1.224408507347107, "learning_rate": 2.316780821917808e-07, "loss": 0.1002, "step": 1568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1873.0, "completions/mean_length": 441.2500305175781, "completions/mean_terminated_length": 394.7272644042969, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 1.4927793741226196, "epoch": 10.746575342465754, "frac_reward_zero_std": 0.0, "grad_norm": 1.2398806186511222, "kl": 0.9009843468666077, "learning_rate": 2.3150684931506848e-07, "loss": -0.0538, "num_tokens": 16713816.0, "reward": 0.8900342583656311, "reward_std": 0.05477515608072281, "rewards/check_gptzero_func/mean": 0.8900341987609863, "rewards/check_gptzero_func/std": 0.19602926075458527, "sampling/importance_sampling_ratio/max": 1.5592278242111206, "sampling/importance_sampling_ratio/mean": 1.000590443611145, "sampling/importance_sampling_ratio/min": 0.6057144403457642, "sampling/sampling_logp_difference/max": 0.5013465881347656, "sampling/sampling_logp_difference/mean": 0.025713805109262466, "step": 1569 }, { "clip_ratio/high_max": 0.003782250452786684, "clip_ratio/high_mean": 0.001735039404593408, "clip_ratio/low_mean": 0.001188067952170968, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002923107473179698, "entropy": 1.4892016649246216, "epoch": 10.753424657534246, "grad_norm": 1.1945442063692666, "kl": 0.9001776576042175, "learning_rate": 2.3133561643835613e-07, "loss": -0.0593, "step": 1570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2178.0, "completions/max_terminated_length": 2178.0, "completions/mean_length": 289.39288330078125, "completions/mean_terminated_length": 289.39288330078125, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 1.3451358079910278, "epoch": 10.76027397260274, "frac_reward_zero_std": 0.0, "grad_norm": 1.4341673956775556, "kl": 1.211557149887085, "learning_rate": 2.3116438356164383e-07, "loss": 0.0539, "num_tokens": 16735270.0, "reward": 0.9176637530326843, "reward_std": 0.05652511492371559, "rewards/check_gptzero_func/mean": 0.9176636934280396, "rewards/check_gptzero_func/std": 0.19187770783901215, "sampling/importance_sampling_ratio/max": 2.0, "sampling/importance_sampling_ratio/mean": 0.9998003840446472, "sampling/importance_sampling_ratio/min": 0.6884734034538269, "sampling/sampling_logp_difference/max": 0.7839535474777222, "sampling/sampling_logp_difference/mean": 0.02283666282892227, "step": 1571 }, { "clip_ratio/high_max": 0.005592059344053268, "clip_ratio/high_mean": 0.0012221367796882987, "clip_ratio/low_mean": 0.0023914973717182875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036136338021606207, "entropy": 1.3446195125579834, "epoch": 10.767123287671232, "grad_norm": 1.3548350468541572, "kl": 1.2087949514389038, "learning_rate": 2.3099315068493148e-07, "loss": 0.0475, "step": 1572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 165.1428680419922, "completions/mean_terminated_length": 165.1428680419922, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 1.5388177633285522, "epoch": 10.773972602739725, "frac_reward_zero_std": 0.0, "grad_norm": 1.7121222686545818, "kl": 1.3318309783935547, "learning_rate": 2.3082191780821916e-07, "loss": 0.1099, "num_tokens": 16749258.0, "reward": 0.9487397074699402, "reward_std": 0.05228050798177719, "rewards/check_gptzero_func/mean": 0.9487396478652954, "rewards/check_gptzero_func/std": 0.1554194539785385, "sampling/importance_sampling_ratio/max": 1.3325207233428955, "sampling/importance_sampling_ratio/mean": 1.000187873840332, "sampling/importance_sampling_ratio/min": 0.7670467495918274, "sampling/sampling_logp_difference/max": 0.28707242012023926, "sampling/sampling_logp_difference/mean": 0.022662272676825523, "step": 1573 }, { "clip_ratio/high_max": 0.006666666828095913, "clip_ratio/high_mean": 0.0014454452320933342, "clip_ratio/low_mean": 0.0014748370740562677, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002920282306149602, "entropy": 1.5421687364578247, "epoch": 10.780821917808218, "grad_norm": 1.6056392502153434, "kl": 1.2957351207733154, "learning_rate": 2.3065068493150684e-07, "loss": 0.1031, "step": 1574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714328289032, "completions/max_length": 3000.0, "completions/max_terminated_length": 1840.0, "completions/mean_length": 594.5535888671875, "completions/mean_terminated_length": 458.396240234375, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "entropy": 1.3529417514801025, "epoch": 10.787671232876713, "frac_reward_zero_std": 0.0, "grad_norm": 1.0505421551048013, "kl": 0.6727092862129211, "learning_rate": 2.3047945205479452e-07, "loss": -0.0309, "num_tokens": 16787091.0, "reward": 0.8659379482269287, "reward_std": 0.05246991291642189, "rewards/check_gptzero_func/mean": 0.8659378886222839, "rewards/check_gptzero_func/std": 0.2193090170621872, "sampling/importance_sampling_ratio/max": 1.443879246711731, "sampling/importance_sampling_ratio/mean": 0.9994645714759827, "sampling/importance_sampling_ratio/min": 0.6798029541969299, "sampling/sampling_logp_difference/max": 0.3859522342681885, "sampling/sampling_logp_difference/mean": 0.025556722655892372, "step": 1575 }, { "clip_ratio/high_max": 0.00420294189825654, "clip_ratio/high_mean": 0.002644740045070648, "clip_ratio/low_mean": 0.001444953610189259, "clip_ratio/low_min": 0.0007485030218958855, "clip_ratio/region_mean": 0.004089694004505873, "entropy": 1.3529417514801025, "epoch": 10.794520547945206, "grad_norm": 1.0151621812519018, "kl": 0.6688400506973267, "learning_rate": 2.303082191780822e-07, "loss": -0.0359, "step": 1576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1974.0, "completions/max_terminated_length": 1974.0, "completions/mean_length": 202.08929443359375, "completions/mean_terminated_length": 202.08929443359375, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.3559950590133667, "epoch": 10.801369863013699, "frac_reward_zero_std": 0.0, "grad_norm": 1.6556628862945584, "kl": 1.2099043130874634, "learning_rate": 2.3013698630136984e-07, "loss": -0.036, "num_tokens": 16802818.0, "reward": 0.9488593935966492, "reward_std": 0.02012084238231182, "rewards/check_gptzero_func/mean": 0.9488593339920044, "rewards/check_gptzero_func/std": 0.16063831746578217, "sampling/importance_sampling_ratio/max": 1.2795803546905518, "sampling/importance_sampling_ratio/mean": 1.0005496740341187, "sampling/importance_sampling_ratio/min": 0.7044907808303833, "sampling/sampling_logp_difference/max": 0.3502800464630127, "sampling/sampling_logp_difference/mean": 0.0222333874553442, "step": 1577 }, { "clip_ratio/high_max": 0.007128309458494186, "clip_ratio/high_mean": 0.0018816519295796752, "clip_ratio/low_mean": 0.001384498318657279, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003266150364652276, "entropy": 1.3546664714813232, "epoch": 10.808219178082192, "grad_norm": 1.598132288385215, "kl": 1.2083642482757568, "learning_rate": 2.2996575342465752e-07, "loss": -0.043, "step": 1578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 2210.0, "completions/mean_length": 313.8571472167969, "completions/mean_terminated_length": 265.0181884765625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.3429721593856812, "epoch": 10.815068493150685, "frac_reward_zero_std": 0.0, "grad_norm": 1.4823882662191727, "kl": 1.2093745470046997, "learning_rate": 2.297945205479452e-07, "loss": 0.0516, "num_tokens": 16825154.0, "reward": 0.9021162986755371, "reward_std": 0.04810168221592903, "rewards/check_gptzero_func/mean": 0.9021162986755371, "rewards/check_gptzero_func/std": 0.20269353687763214, "sampling/importance_sampling_ratio/max": 1.3858332633972168, "sampling/importance_sampling_ratio/mean": 1.0011088848114014, "sampling/importance_sampling_ratio/min": 0.6922478675842285, "sampling/sampling_logp_difference/max": 0.3678112030029297, "sampling/sampling_logp_difference/mean": 0.024359198287129402, "step": 1579 }, { "clip_ratio/high_max": 0.00414250185713172, "clip_ratio/high_mean": 0.002098888624459505, "clip_ratio/low_mean": 0.0019405788043513894, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004039467312395573, "entropy": 1.343919038772583, "epoch": 10.821917808219178, "grad_norm": 1.3948291409731968, "kl": 1.2041908502578735, "learning_rate": 2.2962328767123287e-07, "loss": 0.045, "step": 1580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1261.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 180.48214721679688, "completions/mean_terminated_length": 180.48214721679688, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.3785752058029175, "epoch": 10.82876712328767, "frac_reward_zero_std": 0.0, "grad_norm": 1.7018589465138005, "kl": 1.2155086994171143, "learning_rate": 2.2945205479452055e-07, "loss": 0.0671, "num_tokens": 16839987.0, "reward": 0.9201290011405945, "reward_std": 0.04474605247378349, "rewards/check_gptzero_func/mean": 0.9201289415359497, "rewards/check_gptzero_func/std": 0.19924914836883545, "sampling/importance_sampling_ratio/max": 1.6463526487350464, "sampling/importance_sampling_ratio/mean": 0.9999303817749023, "sampling/importance_sampling_ratio/min": 0.719951868057251, "sampling/sampling_logp_difference/max": 0.4985623359680176, "sampling/sampling_logp_difference/mean": 0.0232398621737957, "step": 1581 }, { "clip_ratio/high_max": 0.009449881501495838, "clip_ratio/high_mean": 0.002866859082132578, "clip_ratio/low_mean": 0.0021705839317291975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005037442781031132, "entropy": 1.3792179822921753, "epoch": 10.835616438356164, "grad_norm": 1.5833470065293518, "kl": 1.2173329591751099, "learning_rate": 2.292808219178082e-07, "loss": 0.0597, "step": 1582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1256.0, "completions/mean_length": 227.21429443359375, "completions/mean_terminated_length": 176.79998779296875, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 1.6569091081619263, "epoch": 10.842465753424658, "frac_reward_zero_std": 0.0, "grad_norm": 1.5080684079444873, "kl": 1.113054871559143, "learning_rate": 2.2910958904109588e-07, "loss": 0.2284, "num_tokens": 16857831.0, "reward": 0.9188264012336731, "reward_std": 0.0869026854634285, "rewards/check_gptzero_func/mean": 0.9188264012336731, "rewards/check_gptzero_func/std": 0.21185891330242157, "sampling/importance_sampling_ratio/max": 1.2813690900802612, "sampling/importance_sampling_ratio/mean": 0.9991999864578247, "sampling/importance_sampling_ratio/min": 0.6377596855163574, "sampling/sampling_logp_difference/max": 0.44979381561279297, "sampling/sampling_logp_difference/mean": 0.0236270260065794, "step": 1583 }, { "clip_ratio/high_max": 0.003448275849223137, "clip_ratio/high_mean": 0.0012248356360942125, "clip_ratio/low_mean": 0.0017193995881825686, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002944235224276781, "entropy": 1.6567293405532837, "epoch": 10.849315068493151, "grad_norm": 1.405661040143412, "kl": 1.086849570274353, "learning_rate": 2.2893835616438355e-07, "loss": 0.2225, "step": 1584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1583.0, "completions/max_terminated_length": 1583.0, "completions/mean_length": 199.67857360839844, "completions/mean_terminated_length": 199.67857360839844, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.5790960788726807, "epoch": 10.856164383561644, "frac_reward_zero_std": 0.0, "grad_norm": 1.44238553876763, "kl": 1.2041739225387573, "learning_rate": 2.2876712328767123e-07, "loss": -0.0044, "num_tokens": 16874241.0, "reward": 0.9582921862602234, "reward_std": 0.03420354798436165, "rewards/check_gptzero_func/mean": 0.9582921266555786, "rewards/check_gptzero_func/std": 0.11602060496807098, "sampling/importance_sampling_ratio/max": 1.2734694480895996, "sampling/importance_sampling_ratio/mean": 1.0002024173736572, "sampling/importance_sampling_ratio/min": 0.7307537794113159, "sampling/sampling_logp_difference/max": 0.3136787414550781, "sampling/sampling_logp_difference/mean": 0.023082418367266655, "step": 1585 }, { "clip_ratio/high_max": 0.005804749205708504, "clip_ratio/high_mean": 0.001532997703179717, "clip_ratio/low_mean": 0.0007201452972367406, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002253142884001136, "entropy": 1.5768269300460815, "epoch": 10.863013698630137, "grad_norm": 1.3902143306748214, "kl": 1.200897216796875, "learning_rate": 2.2859589041095888e-07, "loss": -0.0101, "step": 1586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1665.0, "completions/mean_length": 312.375, "completions/mean_terminated_length": 263.50909423828125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.3064210414886475, "epoch": 10.86986301369863, "frac_reward_zero_std": 0.0, "grad_norm": 1.3212133875599201, "kl": 1.2141377925872803, "learning_rate": 2.2842465753424656e-07, "loss": 0.1645, "num_tokens": 16896726.0, "reward": 0.9254463911056519, "reward_std": 0.036847639828920364, "rewards/check_gptzero_func/mean": 0.9254463315010071, "rewards/check_gptzero_func/std": 0.159530371427536, "sampling/importance_sampling_ratio/max": 1.4486031532287598, "sampling/importance_sampling_ratio/mean": 0.9998269081115723, "sampling/importance_sampling_ratio/min": 0.6962096095085144, "sampling/sampling_logp_difference/max": 0.37059974670410156, "sampling/sampling_logp_difference/mean": 0.020661190152168274, "step": 1587 }, { "clip_ratio/high_max": 0.00390103692188859, "clip_ratio/high_mean": 0.0012658440973609686, "clip_ratio/low_mean": 0.0008937479578889906, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021595919970422983, "entropy": 1.3064210414886475, "epoch": 10.876712328767123, "grad_norm": 1.2691750873207859, "kl": 1.213957667350769, "learning_rate": 2.2825342465753426e-07, "loss": 0.159, "step": 1588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2075.0, "completions/max_terminated_length": 2075.0, "completions/mean_length": 200.50001525878906, "completions/mean_terminated_length": 200.50001525878906, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 1.4612791538238525, "epoch": 10.883561643835616, "frac_reward_zero_std": 0.0, "grad_norm": 1.5641271238032015, "kl": 1.2516165971755981, "learning_rate": 2.280821917808219e-07, "loss": 0.1079, "num_tokens": 16912910.0, "reward": 0.9365888237953186, "reward_std": 0.06619951128959656, "rewards/check_gptzero_func/mean": 0.9365887641906738, "rewards/check_gptzero_func/std": 0.20245890319347382, "sampling/importance_sampling_ratio/max": 1.5580896139144897, "sampling/importance_sampling_ratio/mean": 1.000414252281189, "sampling/importance_sampling_ratio/min": 0.6999406218528748, "sampling/sampling_logp_difference/max": 0.44346046447753906, "sampling/sampling_logp_difference/mean": 0.022668667137622833, "step": 1589 }, { "clip_ratio/high_max": 0.010540788061916828, "clip_ratio/high_mean": 0.0017907964065670967, "clip_ratio/low_mean": 0.0014129711780697107, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032037675846368074, "entropy": 1.4612791538238525, "epoch": 10.89041095890411, "grad_norm": 1.461443545204823, "kl": 1.2509351968765259, "learning_rate": 2.279109589041096e-07, "loss": 0.1019, "step": 1590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1826.0, "completions/mean_length": 359.26788330078125, "completions/mean_terminated_length": 311.2545471191406, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.3723812103271484, "epoch": 10.897260273972602, "frac_reward_zero_std": 0.0, "grad_norm": 1.310341767784972, "kl": 1.1379894018173218, "learning_rate": 2.2773972602739724e-07, "loss": -0.0459, "num_tokens": 16937859.0, "reward": 0.8919155597686768, "reward_std": 0.06490255892276764, "rewards/check_gptzero_func/mean": 0.891915500164032, "rewards/check_gptzero_func/std": 0.22513721883296967, "sampling/importance_sampling_ratio/max": 1.4062896966934204, "sampling/importance_sampling_ratio/mean": 0.9997935891151428, "sampling/importance_sampling_ratio/min": 0.6285502910614014, "sampling/sampling_logp_difference/max": 0.4643392562866211, "sampling/sampling_logp_difference/mean": 0.022382449358701706, "step": 1591 }, { "clip_ratio/high_max": 0.0039100684225559235, "clip_ratio/high_mean": 0.0015208822442218661, "clip_ratio/low_mean": 0.0010334623511880636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002554344478994608, "entropy": 1.3745580911636353, "epoch": 10.904109589041095, "grad_norm": 1.2495475909688079, "kl": 1.1336407661437988, "learning_rate": 2.2756849315068492e-07, "loss": -0.0513, "step": 1592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01785714365541935, "completions/max_length": 3000.0, "completions/max_terminated_length": 1723.0, "completions/mean_length": 240.8035888671875, "completions/mean_terminated_length": 190.6363525390625, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "entropy": 1.481595754623413, "epoch": 10.91095890410959, "frac_reward_zero_std": 0.0, "grad_norm": 1.5033321814060772, "kl": 1.2842237949371338, "learning_rate": 2.273972602739726e-07, "loss": 0.1526, "num_tokens": 16957048.0, "reward": 0.9276373982429504, "reward_std": 0.059449970722198486, "rewards/check_gptzero_func/mean": 0.92763751745224, "rewards/check_gptzero_func/std": 0.19715970754623413, "sampling/importance_sampling_ratio/max": 1.3474782705307007, "sampling/importance_sampling_ratio/mean": 1.0001500844955444, "sampling/importance_sampling_ratio/min": 0.7095062732696533, "sampling/sampling_logp_difference/max": 0.3431859016418457, "sampling/sampling_logp_difference/mean": 0.022183779627084732, "step": 1593 }, { "clip_ratio/high_max": 0.005293005611747503, "clip_ratio/high_mean": 0.001950928708538413, "clip_ratio/low_mean": 0.0010904044611379504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030413330532610416, "entropy": 1.4799352884292603, "epoch": 10.917808219178083, "grad_norm": 1.4632720737453424, "kl": 1.2811166048049927, "learning_rate": 2.2722602739726027e-07, "loss": 0.146, "step": 1594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2120.0, "completions/max_terminated_length": 2120.0, "completions/mean_length": 376.5535888671875, "completions/mean_terminated_length": 376.5535888671875, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 1.4520196914672852, "epoch": 10.924657534246576, "frac_reward_zero_std": 0.0, "grad_norm": 1.2537600890359992, "kl": 0.9753460884094238, "learning_rate": 2.2705479452054792e-07, "loss": -0.0058, "num_tokens": 16983273.0, "reward": 0.9041624665260315, "reward_std": 0.020394397899508476, "rewards/check_gptzero_func/mean": 0.9041624069213867, "rewards/check_gptzero_func/std": 0.19484148919582367, "sampling/importance_sampling_ratio/max": 1.326276421546936, "sampling/importance_sampling_ratio/mean": 0.9999089241027832, "sampling/importance_sampling_ratio/min": 0.5587427616119385, "sampling/sampling_logp_difference/max": 0.5820660591125488, "sampling/sampling_logp_difference/mean": 0.025533460080623627, "step": 1595 }, { "clip_ratio/high_max": 0.006404174491763115, "clip_ratio/high_mean": 0.0030670894775539637, "clip_ratio/low_mean": 0.001609299099072814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00467638811096549, "entropy": 1.4551762342453003, "epoch": 10.931506849315069, "grad_norm": 1.1785940851140462, "kl": 0.9685202836990356, "learning_rate": 2.268835616438356e-07, "loss": -0.0117, "step": 1596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 285.83929443359375, "completions/mean_terminated_length": 285.83929443359375, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "entropy": 1.3716166019439697, "epoch": 10.938356164383562, "frac_reward_zero_std": 0.0, "grad_norm": 1.7331053862947707, "kl": 1.0475562810897827, "learning_rate": 2.267123287671233e-07, "loss": 0.052, "num_tokens": 17004436.0, "reward": 0.8746706247329712, "reward_std": 0.04903999716043472, "rewards/check_gptzero_func/mean": 0.8746705651283264, "rewards/check_gptzero_func/std": 0.25580736994743347, "sampling/importance_sampling_ratio/max": 1.5380969047546387, "sampling/importance_sampling_ratio/mean": 1.000164270401001, "sampling/importance_sampling_ratio/min": 0.6865748763084412, "sampling/sampling_logp_difference/max": 0.4305458068847656, "sampling/sampling_logp_difference/mean": 0.023512782528996468, "step": 1597 }, { "clip_ratio/high_max": 0.00788091029971838, "clip_ratio/high_mean": 0.00284756300970912, "clip_ratio/low_mean": 0.0018469608621671796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0046945237554609776, "entropy": 1.368653416633606, "epoch": 10.945205479452055, "grad_norm": 1.385017559237226, "kl": 1.0450375080108643, "learning_rate": 2.2654109589041095e-07, "loss": 0.0459, "step": 1598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2938.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 407.1785888671875, "completions/mean_terminated_length": 407.1785888671875, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "entropy": 1.4925076961517334, "epoch": 10.952054794520548, "frac_reward_zero_std": 0.0, "grad_norm": 1.372926941484728, "kl": 0.9209632873535156, "learning_rate": 2.2636986301369863e-07, "loss": -0.0002, "num_tokens": 17032158.0, "reward": 0.8793509602546692, "reward_std": 0.06598439812660217, "rewards/check_gptzero_func/mean": 0.8793509602546692, "rewards/check_gptzero_func/std": 0.2327076643705368, "sampling/importance_sampling_ratio/max": 1.505175232887268, "sampling/importance_sampling_ratio/mean": 0.999523937702179, "sampling/importance_sampling_ratio/min": 0.4076354205608368, "sampling/sampling_logp_difference/max": 0.8973820209503174, "sampling/sampling_logp_difference/mean": 0.025584984570741653, "step": 1599 }, { "clip_ratio/high_max": 0.0055741360411047935, "clip_ratio/high_mean": 0.002604082226753235, "clip_ratio/low_mean": 0.0017672901740297675, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004371372517198324, "entropy": 1.4942313432693481, "epoch": 10.95890410958904, "grad_norm": 1.321563894826677, "kl": 0.9251036047935486, "learning_rate": 2.2619863013698628e-07, "loss": -0.0059, "step": 1600 } ], "logging_steps": 1, "max_steps": 2920, "num_input_tokens_seen": 17032158, "num_train_epochs": 20, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }